diff --git a/.github/actions/manylinux/action.yaml b/.github/actions/manylinux/action.yaml index 9dfae726..cd23f9c2 100644 --- a/.github/actions/manylinux/action.yaml +++ b/.github/actions/manylinux/action.yaml @@ -45,6 +45,7 @@ runs: uv pip install ${CARGO_TARGET_DIR}/wheels/orjson*.whl + export PYTHONMALLOC="debug" pytest -v test ./integration/run thread ./integration/run http diff --git a/.github/workflows/artifact.yaml b/.github/workflows/artifact.yaml index d48a4cc7..19225724 100644 --- a/.github/workflows/artifact.yaml +++ b/.github/workflows/artifact.yaml @@ -2,8 +2,9 @@ name: artifact on: push env: CARGO_UNSTABLE_SPARSE_REGISTRY: "true" + FORCE_COLOR: "1" PIP_DISABLE_PIP_VERSION_CHECK: "1" - RUST_TOOLCHAIN: "nightly-2025-01-07" + RUST_TOOLCHAIN: "nightly-2025-04-15" UNSAFE_PYO3_BUILD_FREE_THREADED: "1" UNSAFE_PYO3_SKIP_VERSION_CHECK: "1" UV_LINK_MODE: "copy" @@ -60,18 +61,19 @@ jobs: - run: ./integration/run typestubs - name: Store sdist - if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: name: orjson_sdist path: dist overwrite: true retention-days: 1 + if-no-files-found: "error" + compression-level: 0 manylinux_amd64: runs-on: ubuntu-24.04 container: - image: fedora:42 + image: fedora:rawhide timeout-minutes: 10 strategy: fail-fast: false @@ -83,17 +85,19 @@ jobs: { interpreter: 'python3.11', compatibility: "manylinux_2_17", publish: true }, { interpreter: 'python3.10', compatibility: "manylinux_2_17", publish: true }, { interpreter: 'python3.9', compatibility: "manylinux_2_17", publish: true }, - { interpreter: 'python3.8', compatibility: "manylinux_2_17", publish: true }, ] env: CARGO_TARGET_DIR: "/tmp/orjson" CC: "clang" - CFLAGS: "-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm" + CFLAGS: "-O2 -fstrict-aliasing -fno-plt -emit-llvm" LDFLAGS: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow" RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z threads=4 -D warnings" VENV: ".venv" steps: + - name: CPU info + run: cat /proc/cpuinfo + - run: dnf install --setopt=install_weak_deps=false -y git - uses: actions/checkout@v4 @@ -103,12 +107,12 @@ jobs: with: arch: "x86_64" interpreter: "${{ matrix.python.interpreter }}" - features: "avx512,unstable-simd,yyjson" + features: "avx512,yyjson" compatibility: "${{ matrix.python.compatibility }}" publish: "${{ matrix.python.publish }}" - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/') && inputs.publish == true" + if: matrix.python.publish == true uses: actions/upload-artifact@v4 with: name: "orjson_manylinux_amd64_${{ matrix.python.interpreter }}_${{ matrix.python.compatibility }}" @@ -116,10 +120,21 @@ jobs: overwrite: true retention-days: 1 + - name: Debug + env: + CARGO_TARGET_DIR: "/tmp/orjson" + ORJSON_FEATURES: "avx512,yyjson" + PYTHON: "${{ matrix.python.interpreter }}" + TARGET: "x86_64-unknown-linux-gnu" + run: | + export PATH="$PWD/.venv:$HOME/.cargo/bin:$PATH" + source .venv/bin/activate + script/debug + manylinux_aarch64: runs-on: ubuntu-24.04-arm container: - image: fedora:42 + image: fedora:rawhide timeout-minutes: 10 strategy: fail-fast: false @@ -131,17 +146,19 @@ jobs: { interpreter: 'python3.11', compatibility: "manylinux_2_17", publish: true }, { interpreter: 'python3.10', compatibility: "manylinux_2_17", publish: true }, { interpreter: 'python3.9', compatibility: "manylinux_2_17", publish: true }, - { interpreter: 'python3.8', compatibility: "manylinux_2_17", publish: true }, ] env: CARGO_TARGET_DIR: "/tmp/orjson" CC: "clang" - CFLAGS: "-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm" + CFLAGS: "-O2 -fstrict-aliasing -fno-plt -emit-llvm" LDFLAGS: "-fuse-ld=lld -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow" RUSTFLAGS: "-C linker=clang -C link-arg=-fuse-ld=lld -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z threads=4 -D warnings" VENV: ".venv" steps: + - name: CPU info + run: cat /proc/cpuinfo + - run: dnf install --setopt=install_weak_deps=false -y git - uses: actions/checkout@v4 @@ -151,12 +168,12 @@ jobs: with: arch: "aarch64" interpreter: "${{ matrix.python.interpreter }}" - features: "unstable-simd,yyjson" + features: "generic_simd,yyjson" compatibility: "${{ matrix.python.compatibility }}" publish: "${{ matrix.python.publish }}" - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/') && inputs.publish == true" + if: matrix.python.publish == true uses: actions/upload-artifact@v4 with: name: "orjson_manylinux_aarch64_${{ matrix.python.interpreter }}_${{ matrix.python.compatibility }}" @@ -164,9 +181,20 @@ jobs: overwrite: true retention-days: 1 + - name: Debug + env: + CARGO_TARGET_DIR: "/tmp/orjson" + ORJSON_FEATURES: "generic_simd,yyjson" + PYTHON: "${{ matrix.python.interpreter }}" + TARGET: "aarch64-unknown-linux-gnu" + run: | + export PATH="$PWD/.venv:$HOME/.cargo/bin:$PATH" + source .venv/bin/activate + script/debug + manylinux_cross: runs-on: ubuntu-24.04 - timeout-minutes: 20 + timeout-minutes: 10 strategy: fail-fast: false matrix: @@ -176,33 +204,32 @@ jobs: { interpreter: 'python3.11', abi: 'cp311-cp311' }, { interpreter: 'python3.10', abi: 'cp310-cp310' }, { interpreter: 'python3.9', abi: 'cp39-cp39' }, - { interpreter: 'python3.8', abi: 'cp38-cp38' }, ] target: [ { arch: 'i686', - cflags: '-Os -flto -fstrict-aliasing', - features: 'unstable-simd,yyjson', + cflags: '-Os -fstrict-aliasing', + features: 'yyjson', rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings', target: 'i686-unknown-linux-gnu', }, { arch: 'armv7', - cflags: '-Os -flto=full -fstrict-aliasing', - features: 'yyjson', # no SIMD + cflags: '-Os -fstrict-aliasing', + features: 'yyjson', rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings -C opt-level=s', target: 'armv7-unknown-linux-gnueabihf', }, { arch: 'ppc64le', - cflags: '-Os -flto=full -fstrict-aliasing', - features: 'unstable-simd,yyjson', + cflags: '-Os -fstrict-aliasing', + features: 'generic_simd,yyjson', rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings', target: 'powerpc64le-unknown-linux-gnu', }, { arch: 's390x', - cflags: '-Os -flto=full -fstrict-aliasing -march=z10', + cflags: '-Os -fstrict-aliasing -march=z10', features: 'yyjson', rustflags: '-Z mir-opt-level=4 -C lto=fat -D warnings -C target-cpu=z10', target: 's390x-unknown-linux-gnu', @@ -227,17 +254,18 @@ jobs: target: "${{ matrix.target.target }}" rust-toolchain: "${{ env.RUST_TOOLCHAIN }}" rustup-components: rust-src - manylinux: auto + manylinux: manylinux_2_17 args: --release --strip --out=dist --features=${{ matrix.target.features }} -i ${{ matrix.python.interpreter }} - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: name: "orjson_manylinux_${{ matrix.target.arch }}_${{ matrix.python.interpreter }}" path: dist overwrite: true retention-days: 1 + if-no-files-found: "error" + compression-level: 0 musllinux_amd64: runs-on: ubuntu-24.04 @@ -246,22 +274,21 @@ jobs: fail-fast: false matrix: python: [ - { version: '3.13' }, - { version: '3.12' }, - { version: '3.11' }, - { version: '3.10' }, - { version: '3.9' }, - { version: '3.8' }, + { version: '3.13', pytest: '1' }, + { version: '3.12', pytest: '1' }, + { version: '3.11', pytest: '0' }, + { version: '3.10', pytest: '0' }, + { version: '3.9', pytest: '0' }, ] platform: - target: x86_64-unknown-linux-musl arch: x86_64 platform: linux/amd64 - features: avx512,unstable-simd,unwind,yyjson + features: avx512,unwind,yyjson - target: i686-unknown-linux-musl arch: i686 platform: linux/386 - features: unstable-simd,unwind,yyjson + features: unwind,yyjson steps: - uses: actions/checkout@v4 @@ -274,7 +301,7 @@ jobs: uses: PyO3/maturin-action@v1 env: CC: "gcc" - CFLAGS: "-Os" + CFLAGS: "-O2" LDFLAGS: "-Wl,--as-needed" RUSTFLAGS: "-C lto=fat -Z mir-opt-level=4 -Z threads=2 -D warnings -C target-feature=-crt-static" with: @@ -284,17 +311,10 @@ jobs: manylinux: musllinux_1_2 args: --release --strip --out=dist --features=${{ matrix.platform.features }} -i python${{ matrix.python.version }} - - name: QEMU - if: matrix.platform.arch != 'x86_64' - uses: docker/setup-qemu-action@v3 - with: - image: tonistiigi/binfmt:qemu-v8.1.5 - platforms: ${{ matrix.platform.platform }} - - name: Test uses: addnab/docker-run-action@v3 with: - image: quay.io/pypa/musllinux_1_2_${{ matrix.platform.arch }}:latest + image: "quay.io/pypa/musllinux_1_2_${{ matrix.platform.arch }}:2025.04.19-1" options: -v ${{ github.workspace }}:/io -w /io run: | apk add tzdata @@ -303,18 +323,23 @@ jobs: python${{ matrix.python.version }} -m venv venv venv/bin/pip install -U pip wheel - venv/bin/pip install -r test/requirements.txt venv/bin/pip install orjson --no-index --find-links dist/ --force-reinstall - venv/bin/python -m pytest -v test + + # segfault on starting pytest after January 2025 on 3.11 and older; artifact works fine + if [ ${{ matrix.python.pytest }} == '1' ]; then + venv/bin/pip install -r test/requirements.txt + PYTHONMALLOC="debug" venv/bin/python -m pytest -v test + fi - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: name: orjson_musllinux_${{ matrix.platform.arch }}_${{ matrix.python.version }} path: dist overwrite: true retention-days: 1 + if-no-files-found: "error" + compression-level: 0 musllinux_aarch64: runs-on: ubuntu-24.04-arm @@ -328,17 +353,16 @@ jobs: { version: '3.11' }, { version: '3.10' }, { version: '3.9' }, - { version: '3.8' }, ] platform: - target: aarch64-unknown-linux-musl arch: aarch64 platform: linux/arm64 - features: unstable-simd,unwind,yyjson + features: generic_simd,unwind,yyjson - target: armv7-unknown-linux-musleabihf arch: armv7l platform: linux/arm/v7 - features: unstable-simd,yyjson + features: yyjson steps: - uses: actions/checkout@v4 @@ -351,7 +375,7 @@ jobs: uses: PyO3/maturin-action@v1 env: CC: "gcc" - CFLAGS: "-Os" + CFLAGS: "-O2" LDFLAGS: "-Wl,--as-needed" RUSTFLAGS: "-C lto=fat -Z mir-opt-level=4 -Z threads=2 -D warnings -C target-feature=-crt-static" with: @@ -364,7 +388,7 @@ jobs: - name: Test uses: addnab/docker-run-action@v3 with: - image: quay.io/pypa/musllinux_1_2_${{ matrix.platform.arch }}:latest + image: "quay.io/pypa/musllinux_1_2_${{ matrix.platform.arch }}:2025.04.19-1" options: -v ${{ github.workspace }}:/io -w /io run: | apk add tzdata @@ -375,18 +399,97 @@ jobs: venv/bin/pip install -U pip wheel venv/bin/pip install -r test/requirements.txt venv/bin/pip install orjson --no-index --find-links dist/ --force-reinstall + export PYTHONMALLOC="debug" venv/bin/python -m pytest -v test - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: name: orjson_musllinux_${{ matrix.platform.arch }}_${{ matrix.python.version }} path: dist overwrite: true retention-days: 1 + if-no-files-found: "error" + compression-level: 0 macos_aarch64: + runs-on: macos-15 + timeout-minutes: 10 + strategy: + fail-fast: false + matrix: + python: [ + { version: '3.13', macosx_target: "15.0" }, + { version: '3.12', macosx_target: "15.0" }, + { version: '3.11', macosx_target: "15.0" }, + ] + env: + CC: "clang" + LDFLAGS: "-Wl,--as-needed" + CFLAGS: "-O2 -fstrict-aliasing -fno-plt -mcpu=apple-m1 -mtune=generic" + RUSTFLAGS: "-Z mir-opt-level=4 -Z threads=3 -D warnings" + PATH: "/Users/runner/work/orjson/orjson/.venv/bin:/Users/runner/.cargo/bin:/usr/local/opt/curl/bin:/usr/local/bin:/usr/local/sbin:/Users/runner/bin:/Library/Frameworks/Python.framework/Versions/Current/bin:/usr/bin:/bin:/usr/sbin:/sbin" + steps: + + - name: CPU info + run: sysctl -a | grep brand + + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ matrix.python.version }}" + + - uses: dtolnay/rust-toolchain@master + with: + toolchain: "${{ env.RUST_TOOLCHAIN }}" + targets: "aarch64-apple-darwin" + components: "rust-src" + + - name: Build environment + run: | + cargo fetch --target aarch64-apple-darwin & + + export PATH=$HOME/.cargo/bin:$HOME/.local/bin:$PATH + + curl -LsSf https://astral.sh/uv/install.sh | sh + uv venv --python python${{ matrix.python.version }} + uv pip install --upgrade "maturin>=1,<2" -r test/requirements.txt -r integration/requirements.txt + + mkdir .cargo + cp ci/config.toml .cargo/config.toml + + - name: maturin + run: | + export PATH=$HOME/.cargo/bin:$HOME/.local/bin:$PATH + + MACOSX_DEPLOYMENT_TARGET="${{ matrix.python.macosx_target }}" \ + PYO3_CROSS_LIB_DIR=$(python -c "import sysconfig;print(sysconfig.get_config_var('LIBDIR'))") \ + maturin build --release --strip \ + --features=generic_simd,yyjson \ + --interpreter python${{ matrix.python.version }} \ + --target=aarch64-apple-darwin + uv pip install target/wheels/orjson*.whl + + - run: pytest -v test + env: + PYTHONMALLOC: "debug" + + - run: source .venv/bin/activate && ./integration/run thread + - run: source .venv/bin/activate && ./integration/run http + - run: source .venv/bin/activate && ./integration/run init + + - name: Store wheels + uses: actions/upload-artifact@v4 + with: + name: orjson_macos_aarch64_${{ matrix.python.version }} + path: target/wheels + overwrite: true + retention-days: 1 + if-no-files-found: "error" + compression-level: 0 + + macos_universal2_aarch64: runs-on: macos-15 timeout-minutes: 10 strategy: @@ -400,14 +503,17 @@ jobs: ] env: CC: "clang" - CFLAGS: "-Os -fstrict-aliasing -flto=full" + CFLAGS: "-O2 -fstrict-aliasing" LDFLAGS: "-Wl,--as-needed" - CFLAGS_x86_64_apple_darwin: "-O2 -fstrict-aliasing -flto=full -march=x86-64-v2 -mtune=generic" - CFLAGS_aarch64_apple_darwin: "-O2 -fstrict-aliasing -flto=full -mcpu=apple-m1 -mtune=generic" + CFLAGS_x86_64_apple_darwin: "-O2 -fstrict-aliasing -fno-plt -march=x86-64-v2 -mtune=generic" + CFLAGS_aarch64_apple_darwin: "-O2 -fstrict-aliasing -fno-plt -mcpu=apple-m1 -mtune=generic" RUSTFLAGS: "-C lto=fat -Z mir-opt-level=4 -Z threads=3 -D warnings" PATH: "/Users/runner/work/orjson/orjson/.venv/bin:/Users/runner/.cargo/bin:/usr/local/opt/curl/bin:/usr/local/bin:/usr/local/sbin:/Users/runner/bin:/Library/Frameworks/Python.framework/Versions/Current/bin:/usr/bin:/bin:/usr/sbin:/sbin" steps: + - name: CPU info + run: sysctl -a | grep brand + - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -440,7 +546,7 @@ jobs: MACOSX_DEPLOYMENT_TARGET="${{ matrix.python.macosx_target }}" \ PYO3_CROSS_LIB_DIR=$(python -c "import sysconfig;print(sysconfig.get_config_var('LIBDIR'))") \ maturin build --release --strip \ - --features=unstable-simd,yyjson \ + --features=generic_simd,yyjson \ --interpreter python${{ matrix.python.version }} \ --target=universal2-apple-darwin uv pip install target/wheels/orjson*.whl @@ -454,15 +560,16 @@ jobs: - run: source .venv/bin/activate && ./integration/run init - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: name: orjson_universal2_aarch64_${{ matrix.python.version }} path: target/wheels overwrite: true retention-days: 1 + if-no-files-found: "error" + compression-level: 0 - macos_amd64: + macos_universal2_amd64: runs-on: macos-13 timeout-minutes: 10 strategy: @@ -470,18 +577,20 @@ jobs: matrix: python: [ { version: '3.9', macosx_target: "10.15" }, - { version: '3.8', macosx_target: "10.15" }, ] env: CC: "clang" - CFLAGS: "-Os -fstrict-aliasing -flto=full" + CFLAGS: "-O2 -fstrict-aliasing" LDFLAGS: "-Wl,--as-needed" - CFLAGS_x86_64_apple_darwin: "-O2 -fstrict-aliasing -flto=full -march=x86-64-v2 -mtune=generic" - CFLAGS_aarch64_apple_darwin: "-O2 -fstrict-aliasing -flto=full -mcpu=apple-m1 -mtune=generic" + CFLAGS_x86_64_apple_darwin: "-O2 -fstrict-aliasing -fno-plt -march=x86-64-v2 -mtune=generic" + CFLAGS_aarch64_apple_darwin: "-O2 -fstrict-aliasing -fno-plt -mcpu=apple-m1 -mtune=generic" RUSTFLAGS: "-C lto=fat -Z mir-opt-level=4 -Z threads=3 -D warnings" PATH: "/Users/runner/work/orjson/orjson/.venv/bin:/Users/runner/.cargo/bin:/usr/local/opt/curl/bin:/usr/local/bin:/usr/local/sbin:/Users/runner/bin:/Library/Frameworks/Python.framework/Versions/Current/bin:/usr/bin:/bin:/usr/sbin:/sbin" steps: + - name: CPU info + run: sysctl -a | grep brand + - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -514,7 +623,7 @@ jobs: MACOSX_DEPLOYMENT_TARGET="${{ matrix.python.macosx_target }}" \ PYO3_CROSS_LIB_DIR=$(python -c "import sysconfig;print(sysconfig.get_config_var('LIBDIR'))") \ maturin build --release --strip \ - --features=unstable-simd,yyjson \ + --features=generic_simd,yyjson \ --interpreter python${{ matrix.python.version }} \ --target=universal2-apple-darwin uv pip install target/wheels/orjson*.whl @@ -528,16 +637,17 @@ jobs: - run: source .venv/bin/activate && ./integration/run init - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: name: orjson_universal2_amd64_${{ matrix.python.version }} path: target/wheels overwrite: true retention-days: 1 + if-no-files-found: "error" + compression-level: 0 - windows: - runs-on: windows-2022 + windows_amd64: + runs-on: windows-2025 timeout-minutes: 10 strategy: fail-fast: false @@ -548,18 +658,21 @@ jobs: { version: '3.11' }, { version: '3.10' }, { version: '3.9' }, - { version: '3.8' }, ] platform: [ - { arch: "x64", target: "x86_64-pc-windows-msvc" }, - { arch: "x86", target: "i686-pc-windows-msvc" }, + { arch: "x64", target: "x86_64-pc-windows-msvc", features: "avx512,yyjson" }, + { arch: "x86", target: "i686-pc-windows-msvc", features: "yyjson" }, ] env: - CFLAGS: "-Os" + CFLAGS: "-O2" LDFLAGS: "-Wl,--as-needed" RUSTFLAGS: "-C lto=fat -Z mir-opt-level=4 -D warnings" steps: + - name: CPU info + shell: pwsh + run: Get-WmiObject -Class Win32_Processor -ComputerName. | Select-Object -Property Name, NumberOfCores, NumberOfLogicalProcessors + - uses: actions/checkout@v4 - uses: actions/setup-python@v5 @@ -578,14 +691,81 @@ jobs: cargo fetch --target "${{ matrix.platform.target }}" & python.exe -m pip install --upgrade pip "maturin>=1,<2" wheel - python.exe -m pip install -r test\requirements.txt -r integration\requirements.txt + python.exe -m pip install -r test\requirements.txt + + mkdir .cargo + cp ci\config.toml .cargo\config.toml + + - name: maturin + run: | + maturin.exe build --release --strip --features="${{ matrix.platform.features }}" --target="${{ matrix.platform.target }}" + python.exe -m pip install orjson --no-index --find-links target\wheels + + - run: python.exe -m pytest -s -rxX -v test + env: + PYTHONMALLOC: "debug" + + - name: Store wheels + uses: actions/upload-artifact@v4 + with: + name: orjson_windows_amd64_${{ matrix.platform.arch }}_${{ matrix.python.version }} + path: target\wheels + overwrite: true + retention-days: 1 + if-no-files-found: "error" + compression-level: 0 + + windows_aarch64: + runs-on: windows-11-arm + timeout-minutes: 10 + strategy: + fail-fast: false + matrix: + python: [ + { version: '3.13' }, + { version: '3.12' }, + { version: '3.11' }, + ] + env: + CFLAGS: "-O2" + LDFLAGS: "-Wl,--as-needed" + RUSTFLAGS: "-Z mir-opt-level=4 -D warnings" + TARGET: "aarch64-pc-windows-msvc" + steps: + + - name: CPU info + shell: pwsh + run: Get-WmiObject -Class Win32_Processor -ComputerName. | Select-Object -Property Name, NumberOfCores, NumberOfLogicalProcessors + + - uses: actions/checkout@v4 + + - uses: actions/setup-python@v5 + with: + python-version: "${{ matrix.python.version }}" + architecture: "arm64" + + # from maturin + - shell: pwsh + run: | + Invoke-WebRequest -Uri "https://static.rust-lang.org/rustup/dist/$env:TARGET/rustup-init.exe" -OutFile rustup-init.exe + .\rustup-init.exe --default-toolchain "$env:RUST_TOOLCHAIN-$env:TARGET" --profile minimal --component rust-src -y + "$env:USERPROFILE\.cargo\bin" | Out-File -Append -Encoding ascii $env:GITHUB_PATH + "CARGO_HOME=$env:USERPROFILE\.cargo" | Out-File -Append -Encoding ascii $env:GITHUB_ENV + + - name: Build environment + run: | + cargo fetch --target "$" & + + python.exe -m sysconfig + python.exe -m pip install --upgrade pip "maturin>=1,<2" wheel + python.exe -m pip install -r test\requirements.txt mkdir .cargo cp ci\config.toml .cargo\config.toml - name: maturin run: | - maturin.exe build --release --strip --features=unstable-simd,yyjson --target="${{ matrix.platform.target }}" + maturin.exe build --release --strip --features=generic_simd --target="$env:TARGET" python.exe -m pip install orjson --no-index --find-links target\wheels - run: python.exe -m pytest -s -rxX -v test @@ -593,36 +773,40 @@ jobs: PYTHONMALLOC: "debug" - name: Store wheels - if: "startsWith(github.ref, 'refs/tags/')" uses: actions/upload-artifact@v4 with: - name: orjson_windows_${{ matrix.platform.arch }}_${{ matrix.python.version }} + name: orjson_windows_aarch64_${{ matrix.python.version }} path: target\wheels overwrite: true retention-days: 1 + if-no-files-found: "error" + compression-level: 0 pypi: name: PyPI runs-on: ubuntu-24.04 timeout-minutes: 10 - if: "startsWith(github.ref, 'refs/tags/')" needs: [ macos_aarch64, - macos_amd64, + macos_universal2_aarch64, + macos_universal2_amd64, manylinux_aarch64, manylinux_amd64, manylinux_cross, musllinux_aarch64, musllinux_amd64, sdist, - windows, + windows_aarch64, + windows_amd64, ] environment: - name: pypi + name: PyPI url: https://pypi.org/p/orjson permissions: id-token: write steps: + - uses: actions/checkout@v4 + - uses: actions/download-artifact@v4 with: merge-multiple: true @@ -633,9 +817,12 @@ jobs: - uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: "3.13" + + - run: ./script/check-pypi dist - name: Publish distribution to PyPI + if: "startsWith(github.ref, 'refs/tags/')" uses: pypa/gh-action-pypi-publish@release/v1 with: attestations: true diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index e97ed4b5..c2c90a06 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -1,14 +1,17 @@ name: lint on: push +env: + FORCE_COLOR: "1" + PIP_DISABLE_PIP_VERSION_CHECK: "1" jobs: lint: runs-on: ubuntu-24.04 steps: - uses: actions/setup-python@v5 with: - python-version: "3.12" + python-version: "3.13" - run: curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain=stable --profile=default -y - - run: pip install -U ruff==0.7.1 mypy==1.13.0 + - run: pip install -U "ruff>=0.11,<0.12" "mypy==1.15.0" - uses: actions/checkout@v4 diff --git a/.github/workflows/debug.yaml b/.github/workflows/unusual.yaml similarity index 78% rename from .github/workflows/debug.yaml rename to .github/workflows/unusual.yaml index 0111dd40..dc9adda2 100644 --- a/.github/workflows/debug.yaml +++ b/.github/workflows/unusual.yaml @@ -1,26 +1,27 @@ -name: debug +name: unusual on: push +env: + CARGO_UNSTABLE_SPARSE_REGISTRY: "true" + FORCE_COLOR: "1" + PIP_DISABLE_PIP_VERSION_CHECK: "1" jobs: - debug: - runs-on: ubuntu-22.04 + unusual: + runs-on: ubuntu-24.04 strategy: fail-fast: false matrix: profile: [ { rust: "1.82", features: "" }, - { rust: "1.82", features: "--features=yyjson" }, - { rust: "nightly-2025-01-07", features: "--features=avx512,yyjson,unstable-simd" }, + { rust: "nightly-2025-04-15", features: "--features=generic_simd" }, ] python: [ { version: '3.13' }, { version: '3.12' }, - { version: '3.8' }, + { version: '3.9' }, ] env: - CC: "gcc" - CFLAGS: "-Os -fstrict-aliasing" - LDFLAGS: "-Wl,--as-needed" + ORJSON_DISABLE_YYJSON: "1" CARGO_UNSTABLE_SPARSE_REGISTRY: "true" UNSAFE_PYO3_SKIP_VERSION_CHECK: "1" steps: @@ -54,7 +55,6 @@ jobs: timeout-minutes: 2 - run: ./integration/run http - if: matrix.python.version != '3.13' # alpha 5 httpx type issue timeout-minutes: 2 - run: ./integration/run init diff --git a/CHANGELOG.md b/CHANGELOG.md index 34f7f14a..dd2910d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## 3.10.18 + +### Fixed + +- Fix incorrect escaping of the vertical tabulation character. This was +introduced in 3.10.17. + + +## 3.10.17 + +### Changed + +- Publish PyPI Windows aarch64/arm64 wheels. +- ABI compatibility with CPython 3.14 alpha 7. +- Fix incompatibility running on Python 3.13 using WASM. + + +## 3.10.16 + +### Changed + +- Improve performance of serialization on amd64 machines with AVX-512. +- ABI compatibility with CPython 3.14 alpha 6. +- Drop support for Python 3.8. +- Publish additional PyPI wheels for macOS that target only aarch64, macOS 15, +and recent Python. + ## 3.10.15 diff --git a/Cargo.lock b/Cargo.lock index a198f82e..03fdf346 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,9 +25,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.1" +version = "1.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd9de9f2205d5ef3fd67e685b0df337994ddd4495e2a28d185500d0e1edfea47" +checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a" dependencies = [ "shlex", ] @@ -40,9 +40,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "compact_str" -version = "0.8.1" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b79c4069c6cad78e2e0cdfcbd26275770669fb39fd308a752dc110e83b9af32" +checksum = "3fdb1325a1cece981e8a296ab8f0f9b63ae357bd0784a9faaf548cc7b480707a" dependencies = [ "castaway", "cfg-if", @@ -55,9 +55,9 @@ dependencies = [ [[package]] name = "crunchy" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" +checksum = "43da5946c66ffcc7745f48db692ffbb10a83bfe0afd96235c5c2a4fb23994929" [[package]] name = "encoding_rs" @@ -76,9 +76,9 @@ checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" [[package]] name = "half" -version = "2.4.1" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd08c532ae367adf81c312a4580bc67f1d0fe8bc9c460520283f4c0ff277888" +checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" dependencies = [ "cfg-if", "crunchy", @@ -86,9 +86,9 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.14" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] name = "itoap" @@ -98,20 +98,32 @@ checksum = "9028f49264629065d057f340a86acb84867925865f73bbf8d47b4d149a7e88b8" [[package]] name = "jiff" -version = "0.1.21" +version = "0.2.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed0ce60560149333a8e41ca7dc78799c47c5fd435e2bc18faf6a054382eec037" +checksum = "5a064218214dc6a10fbae5ec5fa888d80c45d611aba169222fc272072bf7aef6" dependencies = [ + "jiff-static", "portable-atomic", "portable-atomic-util", "serde", ] +[[package]] +name = "jiff-static" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "199b7932d97e325aff3a7030e141eafe7f2c6268e1d1b24859b753a627f45254" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "libc" -version = "0.2.169" +version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a" +checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "memchr" @@ -121,13 +133,13 @@ checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" [[package]] name = "once_cell" -version = "1.20.2" +version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" +checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "orjson" -version = "3.10.15" +version = "3.10.18" dependencies = [ "associative-cache", "bytecount", @@ -154,9 +166,9 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" +checksum = "350e9b48cbc6b0e028b0473b114454c6316e57336ee184ceab6e53f72c178b3e" [[package]] name = "portable-atomic-util" @@ -169,9 +181,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.92" +version = "1.0.95" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" +checksum = "02b3e5e68a3a1a02aad3ec490a98007cbc13c37cbe84a3cd7b8e406d76e7f778" dependencies = [ "unicode-ident", ] @@ -194,39 +206,39 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.38" +version = "1.0.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" +checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" dependencies = [ "proc-macro2", ] [[package]] name = "rustversion" -version = "1.0.19" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7c45b9784283f1b2e7fb61b42047c2fd678ef0960d4f6f1eba131594cc369d4" +checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" [[package]] name = "ryu" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" [[package]] name = "serde" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02fc4265df13d6fa1d00ecff087228cc0a2b5f3c0e87e258d8b94a156e984c70" +checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.217" +version = "1.0.219" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a9bf7cf98d04a2b28aead066b7496853d4779c9cc183c440dbac457641e19a0" +checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", @@ -235,9 +247,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.135" +version = "1.0.140" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b0d7ba2887406110130a978386c4e1befb98c674b4fba677954e4db976630d9" +checksum = "20068b6e96dc6c9bd23e01df8827e6c7e1f2fddd43c21810382803c136b99373" dependencies = [ "itoa", "memchr", @@ -259,9 +271,9 @@ checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" [[package]] name = "smallvec" -version = "1.13.2" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" [[package]] name = "static_assertions" @@ -271,9 +283,9 @@ checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] name = "syn" -version = "2.0.95" +version = "2.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a" +checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" dependencies = [ "proc-macro2", "quote", @@ -282,15 +294,15 @@ dependencies = [ [[package]] name = "target-lexicon" -version = "0.13.1" +version = "0.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc12939a1c9b9d391e0b7135f72fd30508b73450753e28341fed159317582a77" +checksum = "e502f78cdbb8ba4718f566c418c52bc729126ffd16baee5baa718cf25dd5a69a" [[package]] name = "unicode-ident" -version = "1.0.14" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" +checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" [[package]] name = "unwinding" @@ -303,9 +315,9 @@ dependencies = [ [[package]] name = "uuid" -version = "1.11.0" +version = "1.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8c5f0a0af699448548ad1a2fbf920fb4bee257eae39953ba95cb84891a0446a" +checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" [[package]] name = "version_check" diff --git a/Cargo.toml b/Cargo.toml index bd3cf92a..7b0d8a86 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "orjson" -version = "3.10.15" +version = "3.10.18" authors = ["ijl "] description = "Fast, correct Python JSON library supporting dataclasses, datetimes, and numpy" edition = "2021" @@ -30,10 +30,7 @@ crate-type = ["cdylib"] [features] default = [] -# Use SIMD intrinsics. This requires Rust on the nightly channel. -unstable-simd = [] - -# Include runtime-detected functions that use AVX512VL. Requires unstable-simd and amd64. +# Include runtime-detected functions that use AVX512VL. avx512 = [] # Avoid bundling libgcc on musl. @@ -44,6 +41,7 @@ unwind = ["unwinding"] yyjson = [] # Features detected by build.rs. Do not specify. +generic_simd = [] inline_int = [] intrinsics = [] optimize = [] @@ -51,12 +49,12 @@ optimize = [] [dependencies] associative-cache = { version = "2", default-features = false } bytecount = { version = "^0.6.7", default-features = false, features = ["runtime-dispatch-simd"] } -compact_str = { version = "0.8", default-features = false, features = ["serde"] } +compact_str = { version = "0.9", default-features = false, features = ["serde"] } encoding_rs = { version = "0.8", default-features = false } half = { version = "2", default-features = false, features = ["std"] } itoa = { version = "1", default-features = false } itoap = { version = "1", default-features = false, features = ["std", "simd"] } -jiff = { version = "^0.1", default-features = false, features = ["alloc"] } +jiff = { version = "^0.2", default-features = false, features = ["alloc"] } once_cell = { version = "1", default-features = false, features = ["alloc", "race"] } pyo3-ffi = { path = "include/pyo3/pyo3-ffi", default-features = false, features = ["extension-module"]} ryu = { version = "1", default-features = false } @@ -64,12 +62,12 @@ serde = { version = "1", default-features = false } serde_json = { version = "1", default-features = false, features = ["std", "float_roundtrip"] } simdutf8 = { version = "0.1", default-features = false, features = ["std", "public_imp", "aarch64_neon"] } smallvec = { version = "^1.11", default-features = false, features = ["union", "write"] } -unwinding = { version = "^0.2.4", default-features = false, features = ["unwinder"], optional = true } +unwinding = { version = "=0.2.5", default-features = false, features = ["unwinder"], optional = true } uuid = { version = "1", default-features = false } xxhash-rust = { version = "^0.8", default-features = false, features = ["xxh3"] } [build-dependencies] -cc = { version = "=1.2.1" } # aarch64-linux-musl link errors +cc = { version = "1" } pyo3-build-config = { path = "include/pyo3/pyo3-build-config" } version_check = { version = "0.9" } diff --git a/README.md b/README.md index 63fc3776..543e85fe 100644 --- a/README.md +++ b/README.md @@ -21,15 +21,17 @@ RFC 8259 ("The JavaScript Object Notation (JSON) Data Interchange Format"). Reading from and writing to files, line-delimited JSON files, and so on is not provided by the library. -orjson supports CPython 3.8, 3.9, 3.10, 3.11, 3.12, 3.13, and 3.14. +orjson supports CPython 3.9, 3.10, 3.11, 3.12, 3.13, and 3.14. -It distributes amd64/x86_64, i686/x86, aarch64/armv8, arm7, POWER/ppc64le, -and s390x wheels for Linux, amd64 and aarch64 wheels for macOS, and amd64 -and i686/x86 wheels for Windows. +It distributes amd64/x86_64/x64, i686/x86, aarch64/arm64/armv8, arm7, +ppc64le/POWER8, and s390x wheels for Linux, amd64 and aarch64 wheels +for macOS, and amd64, i686, and aarch64 wheels for Windows. orjson does not and will not support PyPy, embedded Python builds for Android/iOS, or PEP 554 subinterpreters. +orjson may support PEP 703 free-threading when it is stable. + Releases follow semantic versioning and serializing a new object type without an opt-in flag is considered a breaking change. @@ -1078,7 +1080,7 @@ It benefits from also having a C build environment to compile a faster deserialization backend. See this project's `manylinux_2_28` builds for an example using clang and LTO. -The project's own CI tests against `nightly-2025-01-07` and stable 1.72. It +The project's own CI tests against `nightly-2025-04-15` and stable 1.82. It is prudent to pin the nightly version because that channel can introduce breaking changes. There is a significant performance benefit to using nightly. @@ -1094,10 +1096,11 @@ The source distribution on PyPI contains all dependencies' source and can be built without network access. The file can be downloaded from `https://files.pythonhosted.org/packages/source/o/orjson/orjson-${version}.tar.gz`. -orjson's tests are included in the source distribution on PyPI. The -requirements to run the tests are specified in `test/requirements.txt`. The -tests should be run as part of the build. It can be run with -`pytest -q test`. +orjson's tests are included in the source distribution on PyPI. The tests +require only `pytest`. There are optional packages such as `pytz` and `numpy` +listed in `test/requirements.txt` and used in ~10% of tests. Not having these +dependencies causes the tests needing them to skip. Tests can be run +with `pytest -q test`. ## License diff --git a/bench/requirements.txt b/bench/requirements.txt index cd245e70..e7710b74 100644 --- a/bench/requirements.txt +++ b/bench/requirements.txt @@ -1,6 +1,6 @@ -memory-profiler; python_version<"3.13" -pandas; python_version<"3.13" +memory-profiler; python_version<"3.14" +pandas; python_version<"3.14" pytest-benchmark pytest-random-order -seaborn; python_version<"3.13" +seaborn; python_version<"3.14" tabulate diff --git a/build.rs b/build.rs index 1de74831..43538c0c 100644 --- a/build.rs +++ b/build.rs @@ -19,7 +19,6 @@ fn main() { println!("cargo:rustc-check-cfg=cfg(Py_3_12)"); println!("cargo:rustc-check-cfg=cfg(Py_3_13)"); println!("cargo:rustc-check-cfg=cfg(Py_3_14)"); - println!("cargo:rustc-check-cfg=cfg(Py_3_8)"); println!("cargo:rustc-check-cfg=cfg(Py_3_9)"); println!("cargo:rustc-check-cfg=cfg(Py_GIL_DISABLED)"); @@ -28,6 +27,9 @@ fn main() { println!("{cfg}"); } + #[allow(unused_variables)] + let is_64_bit_python = matches!(python_config.pointer_width, Some(64)); + if let Some(true) = version_check::supports_feature("core_intrinsics") { println!("cargo:rustc-cfg=feature=\"intrinsics\""); } @@ -37,22 +39,7 @@ fn main() { } #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - if env::var("ORJSON_DISABLE_SIMD").is_err() { - // auto build unstable SIMD on nightly - if let Some(true) = version_check::supports_feature("portable_simd") { - println!("cargo:rustc-cfg=feature=\"unstable-simd\""); - } - // auto build AVX512 on x86-64-v4 or supporting native targets - #[cfg(all(target_arch = "x86_64", target_feature = "avx512vl"))] - if let Some(true) = version_check::supports_feature("stdarch_x86_avx512") { - if env::var("ORJSON_DISABLE_AVX512").is_err() { - println!("cargo:rustc-cfg=feature=\"avx512\""); - } - } - } - - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - if matches!(python_config.pointer_width, Some(64)) { + if is_64_bit_python { println!("cargo:rustc-cfg=feature=\"inline_int\""); } diff --git a/ci/config.toml b/ci/config.toml index 0823eb5a..8039ce14 100644 --- a/ci/config.toml +++ b/ci/config.toml @@ -1,6 +1,7 @@ [unstable] build-std = ["core", "std", "alloc", "proc_macro", "panic_abort"] build-std-features = ["panic_immediate_abort"] +trim-paths = true [target.x86_64-apple-darwin] linker = "clang" @@ -8,4 +9,4 @@ rustflags = ["-C", "target-cpu=x86-64-v2", "-Z", "tune-cpu=generic"] [target.aarch64-apple-darwin] linker = "clang" -rustflags = ["-C", "target-cpu=apple-m1"] +rustflags = ["-C", "target-cpu=apple-m1", "-Z", "tune-cpu=generic"] diff --git a/include/pyo3/pyo3-ffi/src/cpython/longobject.rs b/include/pyo3/pyo3-ffi/src/cpython/longobject.rs index 45acaae5..6063575e 100644 --- a/include/pyo3/pyo3-ffi/src/cpython/longobject.rs +++ b/include/pyo3/pyo3-ffi/src/cpython/longobject.rs @@ -61,6 +61,18 @@ extern "C" { is_signed: c_int, ) -> *mut PyObject; + #[cfg(Py_3_13)] + #[cfg_attr(PyPy, link_name = "_PyPyLong_AsByteArrayO")] + pub fn _PyLong_AsByteArray( + v: *mut PyLongObject, + bytes: *mut c_uchar, + n: size_t, + little_endian: c_int, + is_signed: c_int, + with_exceptions: c_int, + ) -> c_int; + + #[cfg(not(Py_3_13))] #[cfg_attr(PyPy, link_name = "_PyPyLong_AsByteArrayO")] pub fn _PyLong_AsByteArray( v: *mut PyLongObject, diff --git a/include/pyo3/pyo3-ffi/src/cpython/tupleobject.rs b/include/pyo3/pyo3-ffi/src/cpython/tupleobject.rs index 9616d437..c06609fb 100644 --- a/include/pyo3/pyo3-ffi/src/cpython/tupleobject.rs +++ b/include/pyo3/pyo3-ffi/src/cpython/tupleobject.rs @@ -5,6 +5,8 @@ use crate::pyport::Py_ssize_t; #[repr(C)] pub struct PyTupleObject { pub ob_base: PyVarObject, + #[cfg(Py_3_14)] + pub ob_hash: crate::Py_hash_t, pub ob_item: [*mut PyObject; 1], } diff --git a/include/pyo3/pyo3-ffi/src/cpython/unicodeobject.rs b/include/pyo3/pyo3-ffi/src/cpython/unicodeobject.rs index fae626b8..72da46c1 100644 --- a/include/pyo3/pyo3-ffi/src/cpython/unicodeobject.rs +++ b/include/pyo3/pyo3-ffi/src/cpython/unicodeobject.rs @@ -119,9 +119,12 @@ where #[cfg(not(GraalPy))] const STATE_INTERNED_INDEX: usize = 0; -#[cfg(not(GraalPy))] +#[cfg(all(not(GraalPy), not(Py_3_14)))] const STATE_INTERNED_WIDTH: u8 = 2; +#[cfg(all(not(GraalPy), Py_3_14))] +const STATE_INTERNED_WIDTH: u8 = 16; + #[cfg(not(GraalPy))] const STATE_KIND_INDEX: usize = STATE_INTERNED_WIDTH as usize; #[cfg(not(GraalPy))] diff --git a/include/yyjson/yyjson.c b/include/yyjson/yyjson.c index 803c2f56..1031182d 100644 --- a/include/yyjson/yyjson.c +++ b/include/yyjson/yyjson.c @@ -5265,7 +5265,7 @@ static_inline bool read_string(u8 **ptr, u8 *cur = *ptr; u8 **end = ptr; - u8 *src = ++cur, *dst, *pos; + u8 *src = ++cur, *dst; u16 hi, lo; u32 uni, tmp; @@ -5332,7 +5332,6 @@ static_inline bool read_string(u8 **ptr, consecutively. We process the byte sequences of the same length in each loop, which is more friendly to branch prediction. */ - pos = src; #if YYJSON_DISABLE_UTF8_VALIDATION while (true) repeat8({ if (likely((*src & 0xF0) == 0xE0)) src += 3; @@ -5550,7 +5549,6 @@ static_inline bool read_string(u8 **ptr, copy_utf8: if (*src & 0x80) { /* non-ASCII character */ - pos = src; uni = byte_load_4(src); #if YYJSON_DISABLE_UTF8_VALIDATION while (true) repeat4({ diff --git a/integration/requirements.txt b/integration/requirements.txt index 2f47d3a7..71e82086 100644 --- a/integration/requirements.txt +++ b/integration/requirements.txt @@ -1,3 +1,3 @@ flask;sys_platform!="win" gunicorn;sys_platform!="win" -httpx==0.27.2;sys_platform!="win" +httpx==0.28.1;sys_platform!="win" diff --git a/pyproject.toml b/pyproject.toml index e9790e35..701765cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,8 +1,8 @@ [project] name = "orjson" -version = "3.10.15" +version = "3.10.18" repository = "https://github.com/ijl/orjson" -requires-python = ">=3.8" +requires-python = ">=3.9" classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", @@ -12,7 +12,6 @@ classifiers = [ "Operating System :: Microsoft :: Windows", "Operating System :: POSIX :: Linux", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", @@ -47,7 +46,7 @@ include = [ [tool.ruff] line-length = 88 -target-version = "py38" +target-version = "py39" [tool.ruff.lint] select = [ @@ -62,7 +61,7 @@ ignore = [ known-first-party = ["orjson"] [tool.mypy] -python_version = "3.8" +python_version = "3.9" [[tool.mypy.overrides]] module = ["dateutil", "pytz"] diff --git a/requirements.txt b/requirements.txt index f898acc2..1e477a76 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,5 +2,5 @@ -r integration/requirements.txt -r test/requirements.txt maturin>=1,<2 -mypy==1.13.0 -ruff>=0.8,<9 +mypy==1.15.0 +ruff>=0.11,<0.12 diff --git a/script/cargo b/script/cargo new file mode 100755 index 00000000..3e20029b --- /dev/null +++ b/script/cargo @@ -0,0 +1,8 @@ +#!/usr/bin/env bash + +set -eou pipefail + +export UNSAFE_PYO3_BUILD_FREE_THREADED=1 +export UNSAFE_PYO3_SKIP_VERSION_CHECK=1 + +RUSTFLAGS="-C panic=abort -Z panic_abort_tests" cargo "$@" --target="${TARGET:-x86_64-unknown-linux-gnu}" diff --git a/script/check-pypi b/script/check-pypi new file mode 100755 index 00000000..54f60216 --- /dev/null +++ b/script/check-pypi @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import sys +from pathlib import Path + +import tomllib + +dist = sys.argv[1] + +pyproject_doc = tomllib.loads(Path("pyproject.toml").read_text(encoding="utf-8")) +pyproject_version = pyproject_doc["project"]["version"] + +prefix = f"orjson-{pyproject_version}" + +abis = ( + "cp39-cp39", + "cp310-cp310", + "cp311-cp311", + "cp312-cp312", + "cp313-cp313", +) + +per_abi_tags = ( + "macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2", + "manylinux_2_17_aarch64.manylinux2014_aarch64", + "manylinux_2_17_armv7l.manylinux2014_armv7l", + "manylinux_2_17_ppc64le.manylinux2014_ppc64le", + "manylinux_2_17_s390x.manylinux2014_s390x", + "manylinux_2_17_x86_64.manylinux2014_x86_64", + "manylinux_2_17_i686.manylinux2014_i686", + "musllinux_1_2_aarch64", + "musllinux_1_2_armv7l", + "musllinux_1_2_i686", + "musllinux_1_2_x86_64", + "win32", + "win_amd64", +) + +wheels_matrix = set() + +# orjson-3.10.15-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl +for abi in abis: + for tag in per_abi_tags: + wheels_matrix.add(f"{prefix}-{abi}-{tag}.whl") + +wheels_unique = { + f"{prefix}-cp311-cp311-macosx_15_0_arm64.whl", + f"{prefix}-cp311-cp311-win_arm64.whl", + f"{prefix}-cp312-cp312-macosx_15_0_arm64.whl", + f"{prefix}-cp312-cp312-win_arm64.whl", + f"{prefix}-cp313-cp313-macosx_15_0_arm64.whl", + f"{prefix}-cp313-cp313-win_arm64.whl", +} + +wheels_expected = wheels_matrix | wheels_unique + +wheels_queued = set( + (str(each).replace(f"{dist}/", "") for each in Path(dist).glob("*.whl")) +) + +exit_code = 0 + +# sdist +sdist_path = Path(f"{dist}/{prefix}.tar.gz") +if sdist_path.exists(): + print(f"sdist present\n") +else: + exit_code = 1 + print(f"Missing sdist:\n{sdist_path}\n") + +# whl +if wheels_expected == wheels_queued: + print(f"Wheels as expected, {len(wheels_queued)} total\n") +else: + exit_code = 1 + + missing = "\n".join(sorted(wheels_expected - wheels_queued)) + if missing: + print(f"Missing wheels:\n{missing}\n") + + additional = "\n".join(sorted(wheels_queued - wheels_expected)) + if additional: + print(f"Unexpected wheels:\n{additional}\n") + +sys.exit(exit_code) diff --git a/script/debug b/script/debug new file mode 100755 index 00000000..c5d8f4d5 --- /dev/null +++ b/script/debug @@ -0,0 +1,28 @@ +#!/usr/bin/env bash + +set -eou pipefail + +rm -rf .cargo +rm -f ${CARGO_TARGET_DIR}/wheels/*.whl + +export UNSAFE_PYO3_BUILD_FREE_THREADED=1 +export UNSAFE_PYO3_SKIP_VERSION_CHECK=1 + +export CC="${CC:-clang}" +export LD="${LD:-lld}" +export TARGET="${TARGET:-x86_64-unknown-linux-gnu}" +export CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-target}" +export ORJSON_FEATURES="${ORJSON_FEATURES:-yyjson}" + +export CFLAGS="-Os -fstrict-aliasing" + +export RUSTFLAGS="-C panic=unwind -C linker=${CC} -C link-arg=-fuse-ld=${LD}" + +maturin build --profile=dev --target=${TARGET} --features="${ORJSON_FEATURES}" --interpreter "${PYTHON}" + +uv pip install ${CARGO_TARGET_DIR}/wheels/*.whl + +pytest -v test + +mkdir .cargo +cp ci/config.toml .cargo diff --git a/script/develop b/script/develop index b92a22e9..fa1c66b8 100755 --- a/script/develop +++ b/script/develop @@ -12,7 +12,7 @@ export CARGO_TARGET_DIR="${CARGO_TARGET_DIR:-target}" echo "CC: ${CC}, LD: ${LD}, LD_LIBRARY_PATH: ${LD_LIBRARY_PATH}" -export CFLAGS="-Os -fstrict-aliasing -fno-plt -flto=full -emit-llvm" +export CFLAGS="-O2 -fstrict-aliasing -fno-plt -emit-llvm" export LDFLAGS="-fuse-ld=${LD} -Wl,-plugin-opt=also-emit-llvm -Wl,--as-needed -Wl,-zrelro,-znow" export RUSTFLAGS="-C linker=${CC} -C link-arg=-fuse-ld=${LD} -C linker-plugin-lto -C lto=fat -C link-arg=-Wl,-zrelro,-znow -Z mir-opt-level=4 -Z threads=8" diff --git a/script/lint b/script/lint index e8e03b28..4def3a3f 100755 --- a/script/lint +++ b/script/lint @@ -4,7 +4,8 @@ set -eou pipefail to_lint="./bench/*.py ./pysrc/orjson/__init__.pyi ./test/*.py script/pydataclass script/pysort script/pynumpy script/pynonstr script/pycorrectness script/graph integration/init -integration/wsgi.py integration/typestubs.py integration/thread script/check-version" +integration/wsgi.py integration/typestubs.py integration/thread script/check-version +script/check-pypi" ruff check ${to_lint} --fix ruff format ${to_lint} diff --git a/src/alloc.rs b/src/alloc.rs index 4c54938d..71f29998 100644 --- a/src/alloc.rs +++ b/src/alloc.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use std::alloc::{GlobalAlloc, Layout}; -use std::ffi::c_void; +use core::alloc::{GlobalAlloc, Layout}; +use core::ffi::c_void; struct PyMemAllocator {} @@ -13,19 +13,19 @@ unsafe impl Sync for PyMemAllocator {} unsafe impl GlobalAlloc for PyMemAllocator { #[inline] unsafe fn alloc(&self, layout: Layout) -> *mut u8 { - unsafe { pyo3_ffi::PyMem_Malloc(layout.size()) as *mut u8 } + unsafe { pyo3_ffi::PyMem_Malloc(layout.size()).cast::() } } #[inline] unsafe fn dealloc(&self, ptr: *mut u8, _layout: Layout) { - unsafe { pyo3_ffi::PyMem_Free(ptr as *mut c_void) } + unsafe { pyo3_ffi::PyMem_Free(ptr.cast::()) } } #[inline] unsafe fn alloc_zeroed(&self, layout: Layout) -> *mut u8 { unsafe { let len = layout.size(); - let ptr = pyo3_ffi::PyMem_Malloc(len) as *mut u8; + let ptr = pyo3_ffi::PyMem_Malloc(len).cast::(); core::ptr::write_bytes(ptr, 0, len); ptr } @@ -33,6 +33,6 @@ unsafe impl GlobalAlloc for PyMemAllocator { #[inline] unsafe fn realloc(&self, ptr: *mut u8, _layout: Layout, new_size: usize) -> *mut u8 { - unsafe { pyo3_ffi::PyMem_Realloc(ptr as *mut c_void, new_size) as *mut u8 } + unsafe { pyo3_ffi::PyMem_Realloc(ptr.cast::(), new_size).cast::() } } } diff --git a/src/deserialize/backend/json.rs b/src/deserialize/backend/json.rs index 4a962b0e..5cb6bb37 100644 --- a/src/deserialize/backend/json.rs +++ b/src/deserialize/backend/json.rs @@ -128,16 +128,7 @@ impl<'de> Visitor<'de> for JsonValue { while let Some(key) = map.next_key::>()? { let pykey = get_unicode_key(&key); let pyval = map.next_value_seed(self)?; - let _ = unsafe { - pyo3_ffi::_PyDict_SetItem_KnownHash( - dict_ptr, - pykey, - pyval.as_ptr(), - str_hash!(pykey), - ) - }; - reverse_pydict_incref!(pykey); - reverse_pydict_incref!(pyval.as_ptr()); + pydict_setitem!(dict_ptr, pykey, pyval.as_ptr()); } Ok(nonnull!(dict_ptr)) } diff --git a/src/deserialize/backend/yyjson.rs b/src/deserialize/backend/yyjson.rs index 9840e183..24d247d7 100644 --- a/src/deserialize/backend/yyjson.rs +++ b/src/deserialize/backend/yyjson.rs @@ -1,10 +1,16 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::deserialize::pyobject::*; +use crate::deserialize::pyobject::{ + get_unicode_key, parse_f64, parse_false, parse_i64, parse_none, parse_true, parse_u64, +}; use crate::deserialize::DeserializeError; -use crate::ffi::yyjson::*; +use crate::ffi::yyjson::{ + yyjson_doc, yyjson_doc_free, yyjson_read_err, yyjson_read_opts, yyjson_val, YYJSON_READ_SUCCESS, +}; use crate::str::unicode_from_str; use crate::typeref::{yyjson_init, YYJSON_ALLOC, YYJSON_BUFFER_SIZE}; +use crate::util::usize_to_isize; + use core::ffi::c_char; use core::ptr::{null, null_mut, NonNull}; use std::borrow::Cow; @@ -49,12 +55,14 @@ fn unsafe_yyjson_is_ctn(val: *mut yyjson_val) -> bool { unsafe { (*val).tag as u8 & 0b00000110 == 0b00000110 } } +#[allow(clippy::cast_ptr_alignment)] fn unsafe_yyjson_get_next_container(val: *mut yyjson_val) -> *mut yyjson_val { - unsafe { ((val as *mut u8).add((*val).uni.ofs)) as *mut yyjson_val } + unsafe { (val.cast::().add((*val).uni.ofs)).cast::() } } +#[allow(clippy::cast_ptr_alignment)] fn unsafe_yyjson_get_next_non_container(val: *mut yyjson_val) -> *mut yyjson_val { - unsafe { ((val as *mut u8).add(YYJSON_VAL_SIZE)) as *mut yyjson_val } + unsafe { (val.cast::().add(YYJSON_VAL_SIZE)).cast::() } } pub(crate) fn deserialize( @@ -91,16 +99,16 @@ pub(crate) fn deserialize( unsafe { yyjson_doc_free(doc) }; Ok(pyval) } else if is_yyjson_tag!(val, TAG_ARRAY) { - let pyval = nonnull!(ffi!(PyList_New(unsafe_yyjson_get_len(val) as isize))); + let pyval = nonnull!(ffi!(PyList_New(usize_to_isize(unsafe_yyjson_get_len(val))))); if unsafe_yyjson_get_len(val) > 0 { populate_yy_array(pyval.as_ptr(), val); } unsafe { yyjson_doc_free(doc) }; Ok(pyval) } else { - let pyval = nonnull!(ffi!(_PyDict_NewPresized( - unsafe_yyjson_get_len(val) as isize - ))); + let pyval = nonnull!(ffi!(_PyDict_NewPresized(usize_to_isize( + unsafe_yyjson_get_len(val) + )))); if unsafe_yyjson_get_len(val) > 0 { populate_yy_object(pyval.as_ptr(), val); } @@ -157,7 +165,7 @@ impl ElementType { #[inline(always)] fn parse_yy_string(elem: *mut yyjson_val) -> NonNull { nonnull!(unicode_from_str(str_from_slice!( - (*elem).uni.str_ as *const u8, + (*elem).uni.str_.cast::(), unsafe_yyjson_get_len(elem) ))) } @@ -192,20 +200,22 @@ fn populate_yy_array(list: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { let len = unsafe_yyjson_get_len(elem); assume!(len >= 1); let mut next = unsafe_yyjson_get_first(elem); - let mut dptr = (*(list as *mut pyo3_ffi::PyListObject)).ob_item; + let mut dptr = (*list.cast::()).ob_item; for _ in 0..len { let val = next; if unlikely!(unsafe_yyjson_is_ctn(val)) { next = unsafe_yyjson_get_next_container(val); if is_yyjson_tag!(val, TAG_ARRAY) { - let pyval = ffi!(PyList_New(unsafe_yyjson_get_len(val) as isize)); + let pyval = ffi!(PyList_New(usize_to_isize(unsafe_yyjson_get_len(val)))); append_to_list!(dptr, pyval); if unsafe_yyjson_get_len(val) > 0 { populate_yy_array(pyval, val); } } else { - let pyval = ffi!(_PyDict_NewPresized(unsafe_yyjson_get_len(val) as isize)); + let pyval = ffi!(_PyDict_NewPresized(usize_to_isize(unsafe_yyjson_get_len( + val + )))); append_to_list!(dptr, pyval); if unsafe_yyjson_get_len(val) > 0 { populate_yy_object(pyval, val); @@ -230,12 +240,6 @@ fn populate_yy_array(list: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { } } -macro_rules! add_to_dict { - ($dict:expr, $pykey:expr, $pyval:expr) => { - unsafe { pyo3_ffi::_PyDict_SetItem_KnownHash($dict, $pykey, $pyval, str_hash!($pykey)) } - }; -} - #[inline(never)] fn populate_yy_object(dict: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { unsafe { @@ -247,7 +251,7 @@ fn populate_yy_object(dict: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { let val = next_val; let pykey = { let key_str = str_from_slice!( - (*next_key).uni.str_ as *const u8, + (*next_key).uni.str_.cast::(), unsafe_yyjson_get_len(next_key) ); get_unicode_key(key_str) @@ -256,18 +260,16 @@ fn populate_yy_object(dict: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { next_key = unsafe_yyjson_get_next_container(val); next_val = next_key.add(1); if is_yyjson_tag!(val, TAG_ARRAY) { - let pyval = ffi!(PyList_New(unsafe_yyjson_get_len(val) as isize)); - add_to_dict!(dict, pykey, pyval); - reverse_pydict_incref!(pykey); - reverse_pydict_incref!(pyval); + let pyval = ffi!(PyList_New(usize_to_isize(unsafe_yyjson_get_len(val)))); + pydict_setitem!(dict, pykey, pyval); if unsafe_yyjson_get_len(val) > 0 { populate_yy_array(pyval, val); } } else { - let pyval = ffi!(_PyDict_NewPresized(unsafe_yyjson_get_len(val) as isize)); - add_to_dict!(dict, pykey, pyval); - reverse_pydict_incref!(pykey); - reverse_pydict_incref!(pyval); + let pyval = ffi!(_PyDict_NewPresized(usize_to_isize(unsafe_yyjson_get_len( + val + )))); + pydict_setitem!(dict, pykey, pyval); if unsafe_yyjson_get_len(val) > 0 { populate_yy_object(pyval, val); } @@ -286,9 +288,7 @@ fn populate_yy_object(dict: *mut pyo3_ffi::PyObject, elem: *mut yyjson_val) { ElementType::Array => unreachable_unchecked!(), ElementType::Object => unreachable_unchecked!(), }; - add_to_dict!(dict, pykey, pyval.as_ptr()); - reverse_pydict_incref!(pykey); - reverse_pydict_incref!(pyval.as_ptr()); + pydict_setitem!(dict, pykey, pyval.as_ptr()); } } } diff --git a/src/deserialize/cache.rs b/src/deserialize/cache.rs index f5357893..20848a3b 100644 --- a/src/deserialize/cache.rs +++ b/src/deserialize/cache.rs @@ -15,11 +15,11 @@ unsafe impl Sync for CachedKey {} impl CachedKey { pub fn new(ptr: *mut pyo3_ffi::PyObject) -> CachedKey { CachedKey { - ptr: ptr as *mut c_void, + ptr: ptr.cast::(), } } pub fn get(&mut self) -> *mut pyo3_ffi::PyObject { - let ptr = self.ptr as *mut pyo3_ffi::PyObject; + let ptr = self.ptr.cast::(); debug_assert!(ffi!(Py_REFCNT(ptr)) >= 1); ffi!(Py_INCREF(ptr)); ptr @@ -28,7 +28,7 @@ impl CachedKey { impl Drop for CachedKey { fn drop(&mut self) { - ffi!(Py_DECREF(self.ptr as *mut pyo3_ffi::PyObject)); + ffi!(Py_DECREF(self.ptr.cast::())); } } @@ -36,9 +36,3 @@ pub type KeyMap = AssociativeCache; pub static mut KEY_MAP: OnceCell = OnceCell::new(); - -#[inline(always)] -pub fn cache_hash(key: &[u8]) -> u64 { - assume!(key.len() <= 64); - xxhash_rust::xxh3::xxh3_64(key) -} diff --git a/src/deserialize/deserializer.rs b/src/deserialize/deserializer.rs index b5a94f3a..3a269649 100644 --- a/src/deserialize/deserializer.rs +++ b/src/deserialize/deserializer.rs @@ -21,7 +21,7 @@ pub fn deserialize( } } - let buffer_str = unsafe { std::str::from_utf8_unchecked(buffer) }; + let buffer_str = unsafe { core::str::from_utf8_unchecked(buffer) }; crate::deserialize::backend::deserialize(buffer_str) } diff --git a/src/deserialize/pyobject.rs b/src/deserialize/pyobject.rs index e486c916..2809b662 100644 --- a/src/deserialize/pyobject.rs +++ b/src/deserialize/pyobject.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::deserialize::cache::*; +use crate::deserialize::cache::{CachedKey, KEY_MAP}; use crate::str::{hash_str, unicode_from_str}; use crate::typeref::{FALSE, NONE, TRUE}; use core::ptr::NonNull; @@ -12,7 +12,8 @@ pub fn get_unicode_key(key_str: &str) -> *mut pyo3_ffi::PyObject { hash_str(pyob); pyob } else { - let hash = cache_hash(key_str.as_bytes()); + assume!(key_str.len() <= 64); + let hash = xxhash_rust::xxh3::xxh3_64(key_str.as_bytes()); unsafe { let entry = KEY_MAP .get_mut() diff --git a/src/deserialize/utf8.rs b/src/deserialize/utf8.rs index 2d2842c6..9e993330 100644 --- a/src/deserialize/utf8.rs +++ b/src/deserialize/utf8.rs @@ -1,9 +1,10 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::deserialize::DeserializeError; -use crate::ffi::*; +use crate::ffi::{PyBytes_AS_STRING, PyBytes_GET_SIZE, PyMemoryView_GET_BUFFER}; use crate::str::unicode_to_str; use crate::typeref::{BYTEARRAY_TYPE, BYTES_TYPE, MEMORYVIEW_TYPE, STR_TYPE}; +use crate::util::isize_to_usize; use crate::util::INVALID_STR; use core::ffi::c_char; use std::borrow::Cow; @@ -24,7 +25,7 @@ fn is_valid_utf8(buf: &[u8]) -> bool { #[cfg(target_arch = "aarch64")] fn is_valid_utf8(buf: &[u8]) -> bool { - simdutf8::basic::from_utf8(buf).is_ok() + unsafe { simdutf8::basic::imp::aarch64::neon::validate_utf8(buf).is_ok() } } #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))] @@ -40,8 +41,8 @@ pub fn read_input_to_buf( if is_type!(obj_type_ptr, BYTES_TYPE) { buffer = unsafe { core::slice::from_raw_parts( - PyBytes_AS_STRING(ptr) as *const u8, - PyBytes_GET_SIZE(ptr) as usize, + PyBytes_AS_STRING(ptr).cast::(), + isize_to_usize(PyBytes_GET_SIZE(ptr)), ) }; if !is_valid_utf8(buffer) { @@ -62,7 +63,7 @@ pub fn read_input_to_buf( ))); } buffer = unsafe { - core::slice::from_raw_parts((*membuf).buf as *const u8, (*membuf).len as usize) + core::slice::from_raw_parts((*membuf).buf as *const u8, isize_to_usize((*membuf).len)) }; if !is_valid_utf8(buffer) { return Err(DeserializeError::invalid(Cow::Borrowed(INVALID_STR))); @@ -71,7 +72,7 @@ pub fn read_input_to_buf( buffer = unsafe { core::slice::from_raw_parts( ffi!(PyByteArray_AsString(ptr)) as *const u8, - ffi!(PyByteArray_Size(ptr)) as usize, + isize_to_usize(ffi!(PyByteArray_Size(ptr))), ) }; if !is_valid_utf8(buffer) { diff --git a/src/ffi/buffer.rs b/src/ffi/buffer.rs index cd05081a..a4ae4016 100644 --- a/src/ffi/buffer.rs +++ b/src/ffi/buffer.rs @@ -1,7 +1,7 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use core::ffi::c_int; -use pyo3_ffi::*; +use pyo3_ffi::{PyObject, PyVarObject, Py_buffer, Py_hash_t, Py_ssize_t}; #[repr(C)] pub struct _PyManagedBufferObject { diff --git a/src/ffi/fragment.rs b/src/ffi/fragment.rs index e890fc35..9a7fe098 100644 --- a/src/ffi/fragment.rs +++ b/src/ffi/fragment.rs @@ -6,7 +6,11 @@ use core::ffi::c_char; use std::sync::atomic::{AtomicIsize, AtomicU32, AtomicU64}; use core::ptr::null_mut; -use pyo3_ffi::*; +use pyo3_ffi::{ + PyErr_SetObject, PyExc_TypeError, PyObject, PyTuple_GET_ITEM, PyTypeObject, PyType_Ready, + PyType_Type, PyUnicode_FromStringAndSize, PyVarObject, Py_DECREF, Py_INCREF, Py_SIZE, + Py_TPFLAGS_DEFAULT, +}; // https://docs.python.org/3/c-api/typeobj.html#typedef-examples @@ -21,7 +25,7 @@ pub struct Fragment { #[cfg(Py_GIL_DISABLED)] pub _padding: u16, #[cfg(Py_GIL_DISABLED)] - pub ob_mutex: PyMutex, + pub ob_mutex: pyo3_ffi::PyMutex, #[cfg(Py_GIL_DISABLED)] pub ob_gc_bits: u8, #[cfg(Py_GIL_DISABLED)] @@ -41,7 +45,7 @@ fn raise_args_exception() { unsafe { let msg = "orjson.Fragment() takes exactly 1 positional argument"; let err_msg = - PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize); + PyUnicode_FromStringAndSize(msg.as_ptr().cast::(), msg.len() as isize); PyErr_SetObject(PyExc_TypeError, err_msg); Py_DECREF(err_msg); }; @@ -68,7 +72,7 @@ pub unsafe extern "C" fn orjson_fragment_tp_new( #[cfg(Py_GIL_DISABLED)] _padding: 0, #[cfg(Py_GIL_DISABLED)] - ob_mutex: PyMutex::new(), + ob_mutex: pyo3_ffi::PyMutex::new(), #[cfg(Py_GIL_DISABLED)] ob_gc_bits: 0, #[cfg(Py_GIL_DISABLED)] @@ -80,7 +84,7 @@ pub unsafe extern "C" fn orjson_fragment_tp_new( ob_type: crate::typeref::FRAGMENT_TYPE, contents: contents, }); - Box::into_raw(obj) as *mut PyObject + Box::into_raw(obj).cast::() } } } @@ -90,16 +94,18 @@ pub unsafe extern "C" fn orjson_fragment_tp_new( #[cfg_attr(feature = "optimize", optimize(size))] pub unsafe extern "C" fn orjson_fragment_dealloc(object: *mut PyObject) { unsafe { - Py_DECREF((*(object as *mut Fragment)).contents); - std::alloc::dealloc(object as *mut u8, std::alloc::Layout::new::()); + Py_DECREF((*object.cast::()).contents); + std::alloc::dealloc(object.cast::(), core::alloc::Layout::new::()); } } #[cfg(Py_GIL_DISABLED)] -const FRAGMENT_TP_FLAGS: AtomicU64 = AtomicU64::new(Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE); +const FRAGMENT_TP_FLAGS: AtomicU64 = + AtomicU64::new(Py_TPFLAGS_DEFAULT | pyo3_ffi::Py_TPFLAGS_IMMUTABLETYPE); #[cfg(all(Py_3_10, not(Py_GIL_DISABLED)))] -const FRAGMENT_TP_FLAGS: core::ffi::c_ulong = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_IMMUTABLETYPE; +const FRAGMENT_TP_FLAGS: core::ffi::c_ulong = + Py_TPFLAGS_DEFAULT | pyo3_ffi::Py_TPFLAGS_IMMUTABLETYPE; #[cfg(not(Py_3_10))] const FRAGMENT_TP_FLAGS: core::ffi::c_ulong = Py_TPFLAGS_DEFAULT; @@ -117,7 +123,7 @@ pub unsafe extern "C" fn orjson_fragmenttype_new() -> *mut PyTypeObject { #[cfg(Py_GIL_DISABLED)] _padding: 0, #[cfg(Py_GIL_DISABLED)] - ob_mutex: PyMutex::new(), + ob_mutex: pyo3_ffi::PyMutex::new(), #[cfg(Py_GIL_DISABLED)] ob_gc_bits: 0, #[cfg(Py_GIL_DISABLED)] @@ -128,11 +134,11 @@ pub unsafe extern "C" fn orjson_fragmenttype_new() -> *mut PyTypeObject { ob_refcnt: pyo3_ffi::PyObjectObRefcnt { ob_refcnt: 0 }, #[cfg(not(Py_3_12))] ob_refcnt: 0, - ob_type: core::ptr::addr_of_mut!(PyType_Type), + ob_type: &raw mut PyType_Type, }, ob_size: 0, }, - tp_name: "orjson.Fragment\0".as_ptr() as *const c_char, + tp_name: c"orjson.Fragment".as_ptr(), tp_basicsize: core::mem::size_of::() as isize, tp_itemsize: 0, tp_dealloc: Some(orjson_fragment_dealloc), @@ -151,8 +157,6 @@ pub unsafe extern "C" fn orjson_fragmenttype_new() -> *mut PyTypeObject { tp_vectorcall: None, tp_version_tag: 0, tp_weaklist: null_mut(), - #[cfg(not(Py_3_9))] - tp_print: None, tp_vectorcall_offset: 0, tp_getattr: None, tp_setattr: None, diff --git a/src/ffi/long.rs b/src/ffi/long.rs index 7aa49a67..438143be 100644 --- a/src/ffi/long.rs +++ b/src/ffi/long.rs @@ -40,37 +40,37 @@ pub struct PyLongObject { #[cfg(Py_3_12)] #[inline(always)] pub fn pylong_is_unsigned(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK == 0 } + unsafe { (*ptr.cast::()).long_value.lv_tag & SIGN_MASK == 0 } } #[cfg(not(Py_3_12))] #[inline(always)] pub fn pylong_is_unsigned(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { (*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size > 0 } + unsafe { (*ptr.cast::()).ob_size > 0 } } #[cfg(all(Py_3_12, feature = "inline_int"))] #[inline(always)] pub fn pylong_fits_in_i32(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag < (2 << NON_SIZE_BITS) } + unsafe { (*ptr.cast::()).long_value.lv_tag < (2 << NON_SIZE_BITS) } } #[cfg(all(not(Py_3_12), feature = "inline_int"))] #[inline(always)] pub fn pylong_fits_in_i32(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { isize::abs((*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size) == 1 } + unsafe { isize::abs((*ptr.cast::()).ob_size) == 1 } } #[cfg(all(Py_3_12, feature = "inline_int"))] #[inline(always)] pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { (*(ptr as *mut PyLongObject)).long_value.lv_tag & SIGN_MASK == SIGN_ZERO } + unsafe { (*ptr.cast::()).long_value.lv_tag & SIGN_MASK == SIGN_ZERO } } #[cfg(all(not(Py_3_12), feature = "inline_int"))] #[inline(always)] pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool { - unsafe { (*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size == 0 } + unsafe { (*ptr.cast::()).ob_size == 0 } } #[cfg(all(Py_3_12, feature = "inline_int"))] @@ -78,9 +78,9 @@ pub fn pylong_is_zero(ptr: *mut pyo3_ffi::PyObject) -> bool { pub fn pylong_get_inline_value(ptr: *mut pyo3_ffi::PyObject) -> i64 { unsafe { if pylong_is_unsigned(ptr) { - (*(ptr as *mut PyLongObject)).long_value.ob_digit as i64 + i64::from((*ptr.cast::()).long_value.ob_digit) } else { - -((*(ptr as *mut PyLongObject)).long_value.ob_digit as i64) + -i64::from((*ptr.cast::()).long_value.ob_digit) } } } @@ -89,7 +89,7 @@ pub fn pylong_get_inline_value(ptr: *mut pyo3_ffi::PyObject) -> i64 { #[inline(always)] pub fn pylong_get_inline_value(ptr: *mut pyo3_ffi::PyObject) -> i64 { unsafe { - (*(ptr as *mut pyo3_ffi::PyVarObject)).ob_size as i64 - * (*(ptr as *mut PyLongObject)).ob_digit as i64 + (*ptr.cast::()).ob_size as i64 + * i64::from((*ptr.cast::()).ob_digit) } } diff --git a/src/lib.rs b/src/lib.rs index c738e506..21f72d2c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -3,18 +3,57 @@ #![cfg_attr(feature = "avx512", feature(stdarch_x86_avx512, avx512_target_feature))] #![cfg_attr(feature = "intrinsics", feature(core_intrinsics))] #![cfg_attr(feature = "optimize", feature(optimize_attribute))] -#![cfg_attr(feature = "unstable-simd", feature(portable_simd))] -#![allow(internal_features)] // core_intrinsics -#![allow(non_camel_case_types)] -#![allow(static_mut_refs)] -#![allow(unknown_lints)] // internal_features -#![allow(unused_unsafe)] +#![cfg_attr(feature = "generic_simd", feature(portable_simd))] +#![allow(clippy::absolute_paths)] +#![allow(clippy::allow_attributes)] +#![allow(clippy::allow_attributes_without_reason)] +#![allow(clippy::arbitrary_source_item_ordering)] +#![allow(clippy::decimal_literal_representation)] +#![allow(clippy::doc_markdown)] +#![allow(clippy::explicit_iter_loop)] +#![allow(clippy::if_not_else)] +#![allow(clippy::implicit_return)] +#![allow(clippy::inline_always)] +#![allow(clippy::let_underscore_untyped)] +#![allow(clippy::missing_assert_message)] +#![allow(clippy::missing_docs_in_private_items)] +#![allow(clippy::missing_inline_in_public_items)] +#![allow(clippy::missing_panics_doc)] #![allow(clippy::missing_safety_doc)] +#![allow(clippy::multiple_unsafe_ops_per_block)] #![allow(clippy::needless_lifetimes)] +#![allow(clippy::question_mark_used)] +#![allow(clippy::redundant_else)] #![allow(clippy::redundant_field_names)] -#![allow(clippy::uninlined_format_args)] // MSRV 1.66 +#![allow(clippy::renamed_function_params)] +#![allow(clippy::semicolon_outside_block)] +#![allow(clippy::single_call_fn)] +#![allow(clippy::undocumented_unsafe_blocks)] +#![allow(clippy::unreachable)] +#![allow(clippy::unreadable_literal)] +#![allow(clippy::unusual_byte_groupings)] +#![allow(clippy::unwrap_in_result)] #![allow(clippy::upper_case_acronyms)] #![allow(clippy::zero_prefixed_literal)] +#![allow(internal_features)] // core_intrinsics +#![allow(non_camel_case_types)] +#![allow(static_mut_refs)] +#![allow(unknown_lints)] // internal_features +#![allow(unused_unsafe)] +#![warn(clippy::ptr_arg)] +#![warn(clippy::ptr_as_ptr)] +#![warn(clippy::ptr_cast_constness)] +#![warn(clippy::ptr_eq)] +#![warn(clippy::redundant_allocation)] +#![warn(clippy::redundant_clone)] +#![warn(clippy::redundant_locals)] +#![warn(clippy::redundant_slicing)] +#![warn(clippy::size_of_ref)] +#![warn(clippy::std_instead_of_core)] +#![warn(clippy::trivially_copy_pass_by_ref)] +#![warn(clippy::unnecessary_semicolon)] +#![warn(clippy::unnecessary_wraps)] +#![warn(clippy::zero_ptr)] #[cfg(feature = "unwind")] extern crate unwinding; @@ -31,7 +70,15 @@ mod str; mod typeref; use core::ffi::{c_char, c_int, c_void}; -use pyo3_ffi::*; +use pyo3_ffi::{ + PyCFunction_NewEx, PyErr_SetObject, PyLong_AsLong, PyLong_FromLongLong, PyMethodDef, + PyMethodDefPointer, PyModuleDef, PyModuleDef_HEAD_INIT, PyModuleDef_Slot, PyObject, + PyTuple_GET_ITEM, PyTuple_New, PyTuple_SET_ITEM, PyUnicode_FromStringAndSize, + PyUnicode_InternFromString, PyVectorcall_NARGS, Py_DECREF, Py_SIZE, Py_ssize_t, METH_KEYWORDS, + METH_O, +}; + +use crate::util::{isize_to_usize, usize_to_isize}; #[allow(unused_imports)] use core::ptr::{null, null_mut, NonNull}; @@ -39,32 +86,32 @@ use core::ptr::{null, null_mut, NonNull}; #[cfg(Py_3_13)] macro_rules! add { ($mptr:expr, $name:expr, $obj:expr) => { - PyModule_Add($mptr, $name.as_ptr() as *const c_char, $obj); + pyo3_ffi::PyModule_Add($mptr, $name.as_ptr(), $obj); }; } #[cfg(all(Py_3_10, not(Py_3_13)))] macro_rules! add { ($mptr:expr, $name:expr, $obj:expr) => { - PyModule_AddObjectRef($mptr, $name.as_ptr() as *const c_char, $obj); + pyo3_ffi::PyModule_AddObjectRef($mptr, $name.as_ptr(), $obj); }; } #[cfg(not(Py_3_10))] macro_rules! add { ($mptr:expr, $name:expr, $obj:expr) => { - PyModule_AddObject($mptr, $name.as_ptr() as *const c_char, $obj); + pyo3_ffi::PyModule_AddObject($mptr, $name.as_ptr(), $obj); }; } macro_rules! opt { ($mptr:expr, $name:expr, $opt:expr) => { #[cfg(all(not(target_os = "windows"), target_pointer_width = "64"))] - PyModule_AddIntConstant($mptr, $name.as_ptr() as *const c_char, $opt as i64); + pyo3_ffi::PyModule_AddIntConstant($mptr, $name.as_ptr(), i64::from($opt)); #[cfg(all(not(target_os = "windows"), target_pointer_width = "32"))] - PyModule_AddIntConstant($mptr, $name.as_ptr() as *const c_char, $opt as i32); + pyo3_ffi::PyModule_AddIntConstant($mptr, $name.as_ptr(), $opt as i32); #[cfg(target_os = "windows")] - PyModule_AddIntConstant($mptr, $name.as_ptr() as *const c_char, $opt as i32); + pyo3_ffi::PyModule_AddIntConstant($mptr, $name.as_ptr(), $opt as i32); }; } @@ -75,19 +122,21 @@ macro_rules! opt { pub unsafe extern "C" fn orjson_init_exec(mptr: *mut PyObject) -> c_int { unsafe { typeref::init_typerefs(); + { let version = env!("CARGO_PKG_VERSION"); let pyversion = PyUnicode_FromStringAndSize( - version.as_ptr() as *const c_char, - version.len() as isize, + version.as_ptr().cast::(), + usize_to_isize(version.len()), ); - add!(mptr, "__version__\0", pyversion); + add!(mptr, c"__version__", pyversion); } + { - let dumps_doc = "dumps(obj, /, default=None, option=None)\n--\n\nSerialize Python objects to JSON.\0"; + let dumps_doc = c"dumps(obj, /, default=None, option=None)\n--\n\nSerialize Python objects to JSON."; let wrapped_dumps = PyMethodDef { - ml_name: "dumps\0".as_ptr() as *const c_char, + ml_name: c"dumps".as_ptr(), ml_meth: PyMethodDefPointer { #[cfg(Py_3_10)] PyCFunctionFastWithKeywords: dumps, @@ -95,65 +144,57 @@ pub unsafe extern "C" fn orjson_init_exec(mptr: *mut PyObject) -> c_int { _PyCFunctionFastWithKeywords: dumps, }, ml_flags: pyo3_ffi::METH_FASTCALL | METH_KEYWORDS, - ml_doc: dumps_doc.as_ptr() as *const c_char, + ml_doc: dumps_doc.as_ptr(), }; let func = PyCFunction_NewEx( Box::into_raw(Box::new(wrapped_dumps)), null_mut(), - PyUnicode_InternFromString("orjson\0".as_ptr() as *const c_char), + PyUnicode_InternFromString(c"orjson".as_ptr()), ); - add!(mptr, "dumps\0", func); + add!(mptr, c"dumps", func); } { - let loads_doc = "loads(obj, /)\n--\n\nDeserialize JSON to Python objects.\0"; + let loads_doc = c"loads(obj, /)\n--\n\nDeserialize JSON to Python objects."; let wrapped_loads = PyMethodDef { - ml_name: "loads\0".as_ptr() as *const c_char, + ml_name: c"loads".as_ptr(), ml_meth: PyMethodDefPointer { PyCFunction: loads }, ml_flags: METH_O, - ml_doc: loads_doc.as_ptr() as *const c_char, + ml_doc: loads_doc.as_ptr(), }; let func = PyCFunction_NewEx( Box::into_raw(Box::new(wrapped_loads)), null_mut(), - PyUnicode_InternFromString("orjson\0".as_ptr() as *const c_char), + PyUnicode_InternFromString(c"orjson".as_ptr()), ); - add!(mptr, "loads\0", func); + add!(mptr, c"loads", func); } - add!(mptr, "Fragment\0", typeref::FRAGMENT_TYPE as *mut PyObject); + add!(mptr, c"Fragment", typeref::FRAGMENT_TYPE.cast::()); - opt!(mptr, "OPT_APPEND_NEWLINE\0", opt::APPEND_NEWLINE); - opt!(mptr, "OPT_INDENT_2\0", opt::INDENT_2); - opt!(mptr, "OPT_NAIVE_UTC\0", opt::NAIVE_UTC); - opt!(mptr, "OPT_NON_STR_KEYS\0", opt::NON_STR_KEYS); - opt!(mptr, "OPT_OMIT_MICROSECONDS\0", opt::OMIT_MICROSECONDS); + opt!(mptr, c"OPT_APPEND_NEWLINE", opt::APPEND_NEWLINE); + opt!(mptr, c"OPT_INDENT_2", opt::INDENT_2); + opt!(mptr, c"OPT_NAIVE_UTC", opt::NAIVE_UTC); + opt!(mptr, c"OPT_NON_STR_KEYS", opt::NON_STR_KEYS); + opt!(mptr, c"OPT_OMIT_MICROSECONDS", opt::OMIT_MICROSECONDS); opt!( mptr, - "OPT_PASSTHROUGH_DATACLASS\0", + c"OPT_PASSTHROUGH_DATACLASS", opt::PASSTHROUGH_DATACLASS ); - opt!( - mptr, - "OPT_PASSTHROUGH_DATETIME\0", - opt::PASSTHROUGH_DATETIME - ); - opt!( - mptr, - "OPT_PASSTHROUGH_SUBCLASS\0", - opt::PASSTHROUGH_SUBCLASS - ); - opt!(mptr, "OPT_SERIALIZE_DATACLASS\0", opt::SERIALIZE_DATACLASS); - opt!(mptr, "OPT_SERIALIZE_NUMPY\0", opt::SERIALIZE_NUMPY); - opt!(mptr, "OPT_SERIALIZE_UUID\0", opt::SERIALIZE_UUID); - opt!(mptr, "OPT_SORT_KEYS\0", opt::SORT_KEYS); - opt!(mptr, "OPT_STRICT_INTEGER\0", opt::STRICT_INTEGER); - opt!(mptr, "OPT_UTC_Z\0", opt::UTC_Z); - - add!(mptr, "JSONDecodeError\0", typeref::JsonDecodeError); - add!(mptr, "JSONEncodeError\0", typeref::JsonEncodeError); + opt!(mptr, c"OPT_PASSTHROUGH_DATETIME", opt::PASSTHROUGH_DATETIME); + opt!(mptr, c"OPT_PASSTHROUGH_SUBCLASS", opt::PASSTHROUGH_SUBCLASS); + opt!(mptr, c"OPT_SERIALIZE_DATACLASS", opt::SERIALIZE_DATACLASS); + opt!(mptr, c"OPT_SERIALIZE_NUMPY", opt::SERIALIZE_NUMPY); + opt!(mptr, c"OPT_SERIALIZE_UUID", opt::SERIALIZE_UUID); + opt!(mptr, c"OPT_SORT_KEYS", opt::SORT_KEYS); + opt!(mptr, c"OPT_STRICT_INTEGER", opt::STRICT_INTEGER); + opt!(mptr, c"OPT_UTC_Z", opt::UTC_Z); + + add!(mptr, c"JSONDecodeError", typeref::JsonDecodeError); + add!(mptr, c"JSONEncodeError", typeref::JsonEncodeError); 0 } @@ -174,18 +215,18 @@ pub unsafe extern "C" fn PyInit_orjson() -> *mut PyModuleDef { unsafe { let mod_slots: Box<[PyModuleDef_Slot; PYMODULEDEF_LEN]> = Box::new([ PyModuleDef_Slot { - slot: Py_mod_exec, + slot: pyo3_ffi::Py_mod_exec, value: orjson_init_exec as *mut c_void, }, #[cfg(Py_3_12)] PyModuleDef_Slot { - slot: Py_mod_multiple_interpreters, - value: Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED, + slot: pyo3_ffi::Py_mod_multiple_interpreters, + value: pyo3_ffi::Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED, }, #[cfg(Py_3_13)] PyModuleDef_Slot { - slot: Py_mod_gil, - value: Py_MOD_GIL_USED, + slot: pyo3_ffi::Py_mod_gil, + value: pyo3_ffi::Py_MOD_GIL_USED, }, PyModuleDef_Slot { slot: 0, @@ -195,11 +236,11 @@ pub unsafe extern "C" fn PyInit_orjson() -> *mut PyModuleDef { let init = Box::new(PyModuleDef { m_base: PyModuleDef_HEAD_INIT, - m_name: "orjson\0".as_ptr() as *const c_char, + m_name: c"orjson".as_ptr(), m_doc: null(), m_size: 0, m_methods: null_mut(), - m_slots: Box::into_raw(mod_slots) as *mut PyModuleDef_Slot, + m_slots: Box::into_raw(mod_slots).cast::(), m_traverse: None, m_clear: None, m_free: None, @@ -214,19 +255,20 @@ pub unsafe extern "C" fn PyInit_orjson() -> *mut PyModuleDef { #[inline(never)] #[cfg_attr(feature = "optimize", optimize(size))] fn raise_loads_exception(err: deserialize::DeserializeError) -> *mut PyObject { - let pos = err.pos(); - let msg = err.message; - let doc = match err.data { - Some(as_str) => unsafe { - PyUnicode_FromStringAndSize(as_str.as_ptr() as *const c_char, as_str.len() as isize) - }, - None => { - use_immortal!(crate::typeref::EMPTY_UNICODE) - } - }; unsafe { + let pos = err.pos(); + let msg = err.message; + let doc = match err.data { + Some(as_str) => PyUnicode_FromStringAndSize( + as_str.as_ptr().cast::(), + usize_to_isize(as_str.len()), + ), + None => { + use_immortal!(crate::typeref::EMPTY_UNICODE) + } + }; let err_msg = - PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize); + PyUnicode_FromStringAndSize(msg.as_ptr().cast::(), usize_to_isize(msg.len())); let args = PyTuple_New(3); let pos = PyLong_FromLongLong(pos); PyTuple_SET_ITEM(args, 0, err_msg); @@ -235,7 +277,7 @@ fn raise_loads_exception(err: deserialize::DeserializeError) -> *mut PyObject { PyErr_SetObject(typeref::JsonDecodeError, args); debug_assert!(ffi!(Py_REFCNT(args)) <= 2); Py_DECREF(args); - }; + } null_mut() } @@ -245,11 +287,11 @@ fn raise_loads_exception(err: deserialize::DeserializeError) -> *mut PyObject { fn raise_dumps_exception_fixed(msg: &str) -> *mut PyObject { unsafe { let err_msg = - PyUnicode_FromStringAndSize(msg.as_ptr() as *const c_char, msg.len() as isize); + PyUnicode_FromStringAndSize(msg.as_ptr().cast::(), usize_to_isize(msg.len())); PyErr_SetObject(typeref::JsonEncodeError, err_msg); debug_assert!(ffi!(Py_REFCNT(err_msg)) <= 2); Py_DECREF(err_msg); - }; + } null_mut() } @@ -259,20 +301,20 @@ fn raise_dumps_exception_fixed(msg: &str) -> *mut PyObject { #[cfg(Py_3_12)] fn raise_dumps_exception_dynamic(err: &str) -> *mut PyObject { unsafe { - let cause_exc: *mut PyObject = PyErr_GetRaisedException(); + let cause_exc: *mut PyObject = pyo3_ffi::PyErr_GetRaisedException(); let err_msg = - PyUnicode_FromStringAndSize(err.as_ptr() as *const c_char, err.len() as isize); + PyUnicode_FromStringAndSize(err.as_ptr().cast::(), usize_to_isize(err.len())); PyErr_SetObject(typeref::JsonEncodeError, err_msg); debug_assert!(ffi!(Py_REFCNT(err_msg)) <= 2); Py_DECREF(err_msg); if !cause_exc.is_null() { - let exc: *mut PyObject = PyErr_GetRaisedException(); - PyException_SetCause(exc, cause_exc); - PyErr_SetRaisedException(exc); + let exc: *mut PyObject = pyo3_ffi::PyErr_GetRaisedException(); + pyo3_ffi::PyException_SetCause(exc, cause_exc); + pyo3_ffi::PyErr_SetRaisedException(exc); } - }; + } null_mut() } @@ -285,30 +327,30 @@ fn raise_dumps_exception_dynamic(err: &str) -> *mut PyObject { let mut cause_tp: *mut PyObject = null_mut(); let mut cause_val: *mut PyObject = null_mut(); let mut cause_traceback: *mut PyObject = null_mut(); - PyErr_Fetch(&mut cause_tp, &mut cause_val, &mut cause_traceback); + pyo3_ffi::PyErr_Fetch(&mut cause_tp, &mut cause_val, &mut cause_traceback); let err_msg = - PyUnicode_FromStringAndSize(err.as_ptr() as *const c_char, err.len() as isize); + PyUnicode_FromStringAndSize(err.as_ptr().cast::(), usize_to_isize(err.len())); PyErr_SetObject(typeref::JsonEncodeError, err_msg); debug_assert!(ffi!(Py_REFCNT(err_msg)) == 2); Py_DECREF(err_msg); let mut tp: *mut PyObject = null_mut(); let mut val: *mut PyObject = null_mut(); let mut traceback: *mut PyObject = null_mut(); - PyErr_Fetch(&mut tp, &mut val, &mut traceback); - PyErr_NormalizeException(&mut tp, &mut val, &mut traceback); + pyo3_ffi::PyErr_Fetch(&mut tp, &mut val, &mut traceback); + pyo3_ffi::PyErr_NormalizeException(&mut tp, &mut val, &mut traceback); if !cause_tp.is_null() { - PyErr_NormalizeException(&mut cause_tp, &mut cause_val, &mut cause_traceback); - PyException_SetCause(val, cause_val); + pyo3_ffi::PyErr_NormalizeException(&mut cause_tp, &mut cause_val, &mut cause_traceback); + pyo3_ffi::PyException_SetCause(val, cause_val); Py_DECREF(cause_tp); } if !cause_traceback.is_null() { Py_DECREF(cause_traceback); } - PyErr_Restore(tp, val, traceback); - }; + pyo3_ffi::PyErr_Restore(tp, val, traceback); + } null_mut() } @@ -331,7 +373,7 @@ pub unsafe extern "C" fn dumps( let mut default: Option> = None; let mut optsptr: Option> = None; - let num_args = PyVectorcall_NARGS(nargs as usize); + let num_args = PyVectorcall_NARGS(isize_to_usize(nargs)); if unlikely!(num_args == 0) { return raise_dumps_exception_fixed( "dumps() missing 1 required positional argument: 'obj'", @@ -346,14 +388,14 @@ pub unsafe extern "C" fn dumps( if unlikely!(!kwnames.is_null()) { for i in 0..=Py_SIZE(kwnames).saturating_sub(1) { let arg = PyTuple_GET_ITEM(kwnames, i as Py_ssize_t); - if arg == typeref::DEFAULT { + if core::ptr::eq(arg, typeref::DEFAULT) { if unlikely!(num_args & 2 == 2) { return raise_dumps_exception_fixed( "dumps() got multiple values for argument: 'default'", ); } default = Some(NonNull::new_unchecked(*args.offset(num_args + i))); - } else if arg == typeref::OPTION { + } else if core::ptr::eq(arg, typeref::OPTION) { if unlikely!(num_args & 3 == 3) { return raise_dumps_exception_fixed( "dumps() got multiple values for argument: 'option'", @@ -371,16 +413,19 @@ pub unsafe extern "C" fn dumps( let mut optsbits: i32 = 0; if unlikely!(optsptr.is_some()) { let opts = optsptr.unwrap(); - if (*opts.as_ptr()).ob_type == typeref::INT_TYPE { - optsbits = PyLong_AsLong(optsptr.unwrap().as_ptr()) as i32; + if core::ptr::eq((*opts.as_ptr()).ob_type, typeref::INT_TYPE) { + #[allow(clippy::cast_possible_truncation)] + let tmp = PyLong_AsLong(optsptr.unwrap().as_ptr()) as i32; // stmt_expr_attributes + optsbits = tmp; if unlikely!(!(0..=opt::MAX_OPT).contains(&optsbits)) { return raise_dumps_exception_fixed("Invalid opts"); } - } else if unlikely!(opts.as_ptr() != typeref::NONE) { + } else if unlikely!(!core::ptr::eq(opts.as_ptr(), typeref::NONE)) { return raise_dumps_exception_fixed("Invalid opts"); } } + #[allow(clippy::cast_sign_loss)] match crate::serialize::serialize(*args, default, optsbits as opt::Opt) { Ok(val) => val.as_ptr(), Err(err) => raise_dumps_exception_dynamic(err.as_str()), diff --git a/src/opt.rs b/src/opt.rs index 4ceff66b..fb0930b0 100644 --- a/src/opt.rs +++ b/src/opt.rs @@ -24,6 +24,7 @@ pub const SORT_OR_NON_STR_KEYS: Opt = SORT_KEYS | NON_STR_KEYS; pub const NOT_PASSTHROUGH: Opt = !(PASSTHROUGH_DATETIME | PASSTHROUGH_DATACLASS | PASSTHROUGH_SUBCLASS); +#[allow(clippy::cast_possible_wrap)] pub const MAX_OPT: i32 = (APPEND_NEWLINE | INDENT_2 | NAIVE_UTC diff --git a/src/serialize/buffer.rs b/src/serialize/buffer.rs index 894ee837..8b2bf73d 100644 --- a/src/serialize/buffer.rs +++ b/src/serialize/buffer.rs @@ -23,7 +23,7 @@ impl SmallFixedBuffer { pub unsafe fn as_mut_slice(&mut self) -> &mut [u8] { unsafe { core::slice::from_raw_parts_mut( - (core::ptr::addr_of_mut!(self.bytes) as *mut u8).add(self.idx), + (&raw mut self.bytes).cast::().add(self.idx), BUFFER_LENGTH - self.idx, ) } @@ -39,10 +39,7 @@ impl SmallFixedBuffer { pub fn push(&mut self, value: u8) { debug_assert!(self.idx + 1 < BUFFER_LENGTH); unsafe { - core::ptr::write( - (core::ptr::addr_of_mut!(self.bytes) as *mut u8).add(self.idx), - value, - ); + core::ptr::write((&raw mut self.bytes).cast::().add(self.idx), value); self.idx += 1; }; } @@ -53,7 +50,7 @@ impl SmallFixedBuffer { unsafe { core::ptr::copy_nonoverlapping( slice.as_ptr(), - (core::ptr::addr_of_mut!(self.bytes) as *mut u8).add(self.idx), + (&raw mut self.bytes).cast::().add(self.idx), slice.len(), ); self.idx += slice.len(); @@ -62,7 +59,7 @@ impl SmallFixedBuffer { #[inline] pub fn as_ptr(&self) -> *const u8 { - core::ptr::addr_of!(self.bytes) as *const u8 + (&raw const self.bytes).cast::() } #[inline] diff --git a/src/serialize/error.rs b/src/serialize/error.rs index f1329a30..49b6d626 100644 --- a/src/serialize/error.rs +++ b/src/serialize/error.rs @@ -22,10 +22,10 @@ pub enum SerializeError { UnsupportedType(NonNull), } -impl std::fmt::Display for SerializeError { +impl core::fmt::Display for SerializeError { #[cold] #[cfg_attr(feature = "optimize", optimize(size))] - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result { match *self { SerializeError::DatetimeLibraryUnsupported => write!( f, @@ -63,7 +63,7 @@ impl std::fmt::Display for SerializeError { SerializeError::UnsupportedType(ptr) => { let name = unsafe { CStr::from_ptr((*ob_type!(ptr.as_ptr())).tp_name).to_string_lossy() }; - write!(f, "Type is not JSON serializable: {}", name) + write!(f, "Type is not JSON serializable: {name}") } } } diff --git a/src/serialize/mod.rs b/src/serialize/mod.rs index f044bcd2..a0b03357 100644 --- a/src/serialize/mod.rs +++ b/src/serialize/mod.rs @@ -6,6 +6,6 @@ mod obtype; mod per_type; mod serializer; mod state; -mod writer; +pub mod writer; pub use serializer::serialize; diff --git a/src/serialize/per_type/dataclass.rs b/src/serialize/per_type/dataclass.rs index a5dc707a..370ee9ad 100644 --- a/src/serialize/per_type/dataclass.rs +++ b/src/serialize/per_type/dataclass.rs @@ -8,6 +8,7 @@ use crate::str::unicode_to_str; use crate::typeref::{ DATACLASS_FIELDS_STR, DICT_STR, FIELD_TYPE, FIELD_TYPE_STR, SLOTS_STR, STR_TYPE, }; +use crate::util::isize_to_usize; use serde::ser::{Serialize, SerializeMap, Serializer}; @@ -88,7 +89,7 @@ impl Serialize for DataclassFastSerializer { where S: Serializer, { - let len = ffi!(Py_SIZE(self.ptr)) as usize; + let len = isize_to_usize(ffi!(Py_SIZE(self.ptr))); if unlikely!(len == 0) { return ZeroDictSerializer::new().serialize(serializer); } @@ -100,7 +101,7 @@ impl Serialize for DataclassFastSerializer { pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); - for _ in 0..ffi!(Py_SIZE(self.ptr)) as usize { + for _ in 0..len { let key = next_key; let value = next_value; @@ -114,7 +115,7 @@ impl Serialize for DataclassFastSerializer { let tmp = unicode_to_str(key); if unlikely!(tmp.is_none()) { err!(SerializeError::InvalidStr) - }; + } tmp.unwrap() }; if unlikely!(key_as_str.as_bytes()[0] == b'_') { @@ -158,7 +159,7 @@ impl Serialize for DataclassFallbackSerializer { let fields = ffi!(PyObject_GetAttr(self.ptr, DATACLASS_FIELDS_STR)); debug_assert!(ffi!(Py_REFCNT(fields)) >= 2); ffi!(Py_DECREF(fields)); - let len = ffi!(Py_SIZE(fields)) as usize; + let len = isize_to_usize(ffi!(Py_SIZE(fields))); if unlikely!(len == 0) { return ZeroDictSerializer::new().serialize(serializer); } @@ -170,7 +171,7 @@ impl Serialize for DataclassFallbackSerializer { pydict_next!(fields, &mut pos, &mut next_key, &mut next_value); - for _ in 0..ffi!(Py_SIZE(fields)) as usize { + for _ in 0..len { let attr = next_key; let field = next_value; @@ -179,7 +180,7 @@ impl Serialize for DataclassFallbackSerializer { let field_type = ffi!(PyObject_GetAttr(field, FIELD_TYPE_STR)); debug_assert!(ffi!(Py_REFCNT(field_type)) >= 2); ffi!(Py_DECREF(field_type)); - if unsafe { field_type as *mut pyo3_ffi::PyTypeObject != FIELD_TYPE } { + if unsafe { !core::ptr::eq(field_type.cast::(), FIELD_TYPE) } { continue; } @@ -187,7 +188,7 @@ impl Serialize for DataclassFallbackSerializer { let tmp = unicode_to_str(attr); if unlikely!(tmp.is_none()) { err!(SerializeError::InvalidStr) - }; + } tmp.unwrap() }; if key_as_str.as_bytes()[0] == b'_' { @@ -200,7 +201,7 @@ impl Serialize for DataclassFallbackSerializer { let pyvalue = PyObjectSerializer::new(value, self.state, self.default); map.serialize_key(key_as_str).unwrap(); - map.serialize_value(&pyvalue)? + map.serialize_value(&pyvalue)?; } map.end() } diff --git a/src/serialize/per_type/datetime.rs b/src/serialize/per_type/datetime.rs index 634c40fc..0cad0ad9 100644 --- a/src/serialize/per_type/datetime.rs +++ b/src/serialize/per_type/datetime.rs @@ -1,12 +1,12 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::opt::*; +use crate::opt::{Opt, OMIT_MICROSECONDS}; use crate::serialize::buffer::SmallFixedBuffer; use crate::serialize::error::SerializeError; use crate::serialize::per_type::datetimelike::{DateTimeError, DateTimeLike, Offset}; -#[cfg(Py_3_9)] -use crate::typeref::ZONEINFO_TYPE; -use crate::typeref::{CONVERT_METHOD_STR, DST_STR, NORMALIZE_METHOD_STR, UTCOFFSET_METHOD_STR}; +use crate::typeref::{ + CONVERT_METHOD_STR, DST_STR, NORMALIZE_METHOD_STR, UTCOFFSET_METHOD_STR, ZONEINFO_TYPE, +}; use serde::ser::{Serialize, Serializer}; macro_rules! write_double_digit { @@ -55,13 +55,19 @@ impl Date { } buf.push(b'-'); { - let month = ffi!(PyDateTime_GET_MONTH(self.ptr)) as u32; - write_double_digit!(buf, month); + let val_py = ffi!(PyDateTime_GET_MONTH(self.ptr)); + debug_assert!(val_py >= 0); + #[allow(clippy::cast_sign_loss)] + let val = val_py as u32; + write_double_digit!(buf, val); } buf.push(b'-'); { - let day = ffi!(PyDateTime_GET_DAY(self.ptr)) as u32; - write_double_digit!(buf, day); + let val_py = ffi!(PyDateTime_GET_DAY(self.ptr)); + debug_assert!(val_py >= 0); + #[allow(clippy::cast_sign_loss)] + let val = val_py as u32; + write_double_digit!(buf, val); } } } @@ -95,7 +101,7 @@ impl Time { #[inline(never)] pub fn write_buf(&self, buf: &mut SmallFixedBuffer) -> Result<(), TimeError> { - if unsafe { (*(self.ptr as *mut pyo3_ffi::PyDateTime_Time)).hastzinfo == 1 } { + if unsafe { (*self.ptr.cast::()).hastzinfo == 1 } { return Err(TimeError::HasTimezone); } let hour = ffi!(PyDateTime_TIME_GET_HOUR(self.ptr)) as u8; @@ -122,7 +128,7 @@ impl Serialize for Time { let mut buf = SmallFixedBuffer::new(); if self.write_buf(&mut buf).is_err() { err!(SerializeError::DatetimeLibraryUnsupported) - }; + } serializer.serialize_unit_struct(str_from_slice!(buf.as_ptr(), buf.len())) } } @@ -144,7 +150,11 @@ impl DateTime { macro_rules! pydatetime_get { ($fn: ident, $pyfn: ident, $ty: ident) => { fn $fn(&self) -> $ty { - ffi!($pyfn(self.ptr)) as $ty + let ret = ffi!($pyfn(self.ptr)); + debug_assert!(ret >= 0); + #[allow(clippy::cast_sign_loss)] + let ret2 = ret as $ty; // stmt_expr_attributes + ret2 } }; } @@ -163,9 +173,10 @@ impl DateTimeLike for DateTime { } fn has_tz(&self) -> bool { - unsafe { (*(self.ptr as *mut pyo3_ffi::PyDateTime_DateTime)).hastzinfo == 1 } + unsafe { (*(self.ptr.cast::())).hastzinfo == 1 } } + #[inline(never)] fn slow_offset(&self) -> Result { let tzinfo = ffi!(PyDateTime_DATE_GET_TZINFO(self.ptr)); if ffi!(PyObject_HasAttr(tzinfo, CONVERT_METHOD_STR)) == 1 { @@ -202,13 +213,13 @@ impl DateTimeLike for DateTime { } } - #[cfg(Py_3_9)] + #[inline] fn offset(&self) -> Result { if !self.has_tz() { Ok(Offset::default()) } else { let tzinfo = ffi!(PyDateTime_DATE_GET_TZINFO(self.ptr)); - if unsafe { ob_type!(tzinfo) == ZONEINFO_TYPE } { + if unsafe { core::ptr::eq(ob_type!(tzinfo), ZONEINFO_TYPE) } { // zoneinfo let py_offset = call_method!(tzinfo, UTCOFFSET_METHOD_STR, self.ptr); let offset = Offset { @@ -222,15 +233,6 @@ impl DateTimeLike for DateTime { } } } - - #[cfg(not(Py_3_9))] - fn offset(&self) -> Result { - if !self.has_tz() { - Ok(Offset::default()) - } else { - self.slow_offset() - } - } } impl Serialize for DateTime { diff --git a/src/serialize/per_type/datetimelike.rs b/src/serialize/per_type/datetimelike.rs index 2e24479b..17c3d144 100644 --- a/src/serialize/per_type/datetimelike.rs +++ b/src/serialize/per_type/datetimelike.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::opt::*; +use crate::opt::{Opt, NAIVE_UTC, OMIT_MICROSECONDS, UTC_Z}; use crate::serialize::buffer::SmallFixedBuffer; @@ -60,7 +60,7 @@ pub trait DateTimeLike { /// Is the object time-zone aware? fn has_tz(&self) -> bool; - //// python3.8 or below implementation of offset() + //// Non-zoneinfo implementation of offset() fn slow_offset(&self) -> Result; /// The offset of the timezone. diff --git a/src/serialize/per_type/default.rs b/src/serialize/per_type/default.rs index 6c9e1d47..519dc6c4 100644 --- a/src/serialize/per_type/default.rs +++ b/src/serialize/per_type/default.rs @@ -35,11 +35,14 @@ impl<'a> Serialize for DefaultSerializer<'a> { core::ptr::null_mut() as *mut pyo3_ffi::PyObject )); #[cfg(Py_3_10)] + #[allow(clippy::cast_sign_loss)] + let nargs = ffi!(PyVectorcall_NARGS(1)) as usize; + #[cfg(Py_3_10)] let default_obj = unsafe { pyo3_ffi::PyObject_Vectorcall( callable.as_ptr(), - core::ptr::addr_of!(self.previous.ptr), - pyo3_ffi::PyVectorcall_NARGS(1) as usize, + &raw const self.previous.ptr, + nargs, core::ptr::null_mut(), ) }; diff --git a/src/serialize/per_type/dict.rs b/src/serialize/per_type/dict.rs index b5e6cb3b..e2e04f62 100644 --- a/src/serialize/per_type/dict.rs +++ b/src/serialize/per_type/dict.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::opt::*; +use crate::opt::{NON_STR_KEYS, NOT_PASSTHROUGH, SORT_KEYS, SORT_OR_NON_STR_KEYS}; use crate::serialize::buffer::SmallFixedBuffer; use crate::serialize::error::SerializeError; use crate::serialize::obtype::{pyobject_to_obtype, ObType}; @@ -15,6 +15,7 @@ use crate::serialize::serializer::PyObjectSerializer; use crate::serialize::state::SerializerState; use crate::str::{unicode_to_str, unicode_to_str_via_ffi}; use crate::typeref::{STR_TYPE, TRUE, VALUE_STR}; +use crate::util::isize_to_usize; use compact_str::CompactString; use core::ptr::NonNull; use serde::ser::{Serialize, SerializeMap, Serializer}; @@ -73,16 +74,17 @@ impl Serialize for DictGenericSerializer { ZeroDictSerializer::new().serialize(serializer) } else if likely!(opt_disabled!(self.state.opts(), SORT_OR_NON_STR_KEYS)) { unsafe { - core::mem::transmute::<&DictGenericSerializer, &Dict>(self).serialize(serializer) + (*(core::ptr::from_ref::(self)).cast::()) + .serialize(serializer) } } else if opt_enabled!(self.state.opts(), NON_STR_KEYS) { unsafe { - core::mem::transmute::<&DictGenericSerializer, &DictNonStrKey>(self) + (*(core::ptr::from_ref::(self)).cast::()) .serialize(serializer) } } else { unsafe { - core::mem::transmute::<&DictGenericSerializer, &DictSortedKey>(self) + (*(core::ptr::from_ref::(self)).cast::()) .serialize(serializer) } } @@ -223,7 +225,7 @@ impl Serialize for Dict { let mut map = serializer.serialize_map(None).unwrap(); - let len = ffi!(Py_SIZE(self.ptr)) as usize; + let len = isize_to_usize(ffi!(Py_SIZE(self.ptr))); assume!(len > 0); for _ in 0..len { @@ -241,7 +243,7 @@ impl Serialize for Dict { let tmp = unicode_to_str(key); if unlikely!(tmp.is_none()) { err!(SerializeError::InvalidStr) - }; + } tmp.unwrap() }; @@ -271,7 +273,7 @@ impl Serialize for DictSortedKey { pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); - let len = ffi!(Py_SIZE(self.ptr)) as usize; + let len = isize_to_usize(ffi!(Py_SIZE(self.ptr))); assume!(len > 0); let mut items: SmallVec<[(&str, *mut pyo3_ffi::PyObject); 8]> = @@ -283,7 +285,7 @@ impl Serialize for DictSortedKey { pydict_next!(self.ptr, &mut pos, &mut next_key, &mut next_value); - if unlikely!(unsafe { ob_type!(key) != STR_TYPE }) { + if unlikely!(unsafe { !core::ptr::eq(ob_type!(key), STR_TYPE) }) { err!(SerializeError::KeyMustBeStr) } let data = unicode_to_str(key); @@ -327,6 +329,7 @@ fn non_str_str_subclass(key: *mut pyo3_ffi::PyObject) -> Result Result { let mut buf = SmallFixedBuffer::new(); @@ -364,6 +367,7 @@ fn non_str_time( Ok(CompactString::from(key_as_str)) } +#[allow(clippy::unnecessary_wraps)] #[inline(never)] fn non_str_uuid(key: *mut pyo3_ffi::PyObject) -> Result { let mut buf = SmallFixedBuffer::new(); @@ -372,6 +376,7 @@ fn non_str_uuid(key: *mut pyo3_ffi::PyObject) -> Result Result { @@ -383,6 +388,7 @@ fn non_str_float(key: *mut pyo3_ffi::PyObject) -> Result Result { let ival = ffi!(PyLong_AsLongLong(key)); @@ -417,7 +423,7 @@ impl DictNonStrKey { match pyobject_to_obtype(key, opts) { ObType::None => Ok(CompactString::const_new("null")), ObType::Bool => { - if unsafe { key == TRUE } { + if unsafe { core::ptr::eq(key, TRUE) } { Ok(CompactString::const_new("true")) } else { Ok(CompactString::const_new("false")) @@ -464,7 +470,7 @@ impl Serialize for DictNonStrKey { let opts = self.state.opts() & NOT_PASSTHROUGH; - let len = ffi!(Py_SIZE(self.ptr)) as usize; + let len = isize_to_usize(ffi!(Py_SIZE(self.ptr))); assume!(len > 0); let mut items: SmallVec<[(CompactString, *mut pyo3_ffi::PyObject); 8]> = diff --git a/src/serialize/per_type/fragment.rs b/src/serialize/per_type/fragment.rs index c3b2e74c..c8afa3b8 100644 --- a/src/serialize/per_type/fragment.rs +++ b/src/serialize/per_type/fragment.rs @@ -4,6 +4,7 @@ use crate::ffi::{Fragment, PyBytes_AS_STRING, PyBytes_GET_SIZE}; use crate::serialize::error::SerializeError; use crate::str::unicode_to_str; use crate::typeref::{BYTES_TYPE, STR_TYPE}; +use crate::util::isize_to_usize; use serde::ser::{Serialize, Serializer}; @@ -28,14 +29,14 @@ impl Serialize for FragmentSerializer { { let buffer: &[u8]; unsafe { - let fragment: *mut Fragment = self.ptr as *mut Fragment; + let fragment: *mut Fragment = self.ptr.cast::(); let ob_type = ob_type!((*fragment).contents); - if ob_type == BYTES_TYPE { + if core::ptr::eq(ob_type, BYTES_TYPE) { buffer = core::slice::from_raw_parts( - PyBytes_AS_STRING((*fragment).contents) as *const u8, - PyBytes_GET_SIZE((*fragment).contents) as usize, + PyBytes_AS_STRING((*fragment).contents).cast::(), + isize_to_usize(PyBytes_GET_SIZE((*fragment).contents)), ); - } else if ob_type == STR_TYPE { + } else if core::ptr::eq(ob_type, STR_TYPE) { let uni = unicode_to_str((*fragment).contents); if unlikely!(uni.is_none()) { err!(SerializeError::InvalidStr) diff --git a/src/serialize/per_type/int.rs b/src/serialize/per_type/int.rs index 75778cba..00ef9475 100644 --- a/src/serialize/per_type/int.rs +++ b/src/serialize/per_type/int.rs @@ -34,28 +34,41 @@ impl Serialize for IntSerializer { if crate::ffi::pylong_is_zero(self.ptr) { return serializer.serialize_bytes(b"0"); } - let is_signed = !crate::ffi::pylong_is_unsigned(self.ptr) as i32; + let is_signed = i32::from(!crate::ffi::pylong_is_unsigned(self.ptr)); if crate::ffi::pylong_fits_in_i32(self.ptr) { if is_signed == 0 { + #[allow(clippy::cast_sign_loss)] serializer.serialize_u64(crate::ffi::pylong_get_inline_value(self.ptr) as u64) } else { serializer.serialize_i64(crate::ffi::pylong_get_inline_value(self.ptr)) } } else { let mut buffer: [u8; 8] = [0; 8]; + + #[cfg(not(Py_3_13))] + let ret = pyo3_ffi::_PyLong_AsByteArray( + self.ptr.cast::(), + buffer.as_mut_ptr().cast::(), + 8, + 1, + is_signed, + ); + #[cfg(Py_3_13)] let ret = pyo3_ffi::_PyLong_AsByteArray( - self.ptr as *mut pyo3_ffi::PyLongObject, - buffer.as_mut_ptr() as *mut core::ffi::c_uchar, + self.ptr.cast::(), + buffer.as_mut_ptr().cast::(), 8, 1, is_signed, + 0, ); if unlikely!(ret == -1) { + #[cfg(not(Py_3_13))] ffi!(PyErr_Clear()); err!(SerializeError::Integer64Bits) } if is_signed == 0 { - let val = core::mem::transmute::<[u8; 8], u64>(buffer); + let val = u64::from_ne_bytes(buffer); if unlikely!(opt_enabled!(self.opts, STRICT_INTEGER)) && val > STRICT_INT_MAX as u64 { @@ -63,7 +76,7 @@ impl Serialize for IntSerializer { } serializer.serialize_u64(val) } else { - let val = core::mem::transmute::<[u8; 8], i64>(buffer); + let val = i64::from_ne_bytes(buffer); if unlikely!(opt_enabled!(self.opts, STRICT_INTEGER)) && !(STRICT_INT_MIN..=STRICT_INT_MAX).contains(&val) { diff --git a/src/serialize/per_type/list.rs b/src/serialize/per_type/list.rs index c866d21d..505840e2 100644 --- a/src/serialize/per_type/list.rs +++ b/src/serialize/per_type/list.rs @@ -9,7 +9,8 @@ use crate::serialize::per_type::{ }; use crate::serialize::serializer::PyObjectSerializer; use crate::serialize::state::SerializerState; -use crate::typeref::*; +use crate::typeref::{LIST_TYPE, TUPLE_TYPE}; +use crate::util::isize_to_usize; use core::ptr::NonNull; use serde::ser::{Serialize, SerializeSeq, Serializer}; @@ -49,8 +50,8 @@ impl ListTupleSerializer { is_type!(ob_type!(ptr), LIST_TYPE) || is_subclass_by_flag!(tp_flags!(ob_type!(ptr)), Py_TPFLAGS_LIST_SUBCLASS) ); - let data_ptr = unsafe { (*(ptr as *mut pyo3_ffi::PyListObject)).ob_item }; - let len = ffi!(Py_SIZE(ptr)) as usize; + let data_ptr = unsafe { (*ptr.cast::()).ob_item }; + let len = isize_to_usize(ffi!(Py_SIZE(ptr))); Self { data_ptr: data_ptr, len: len, @@ -68,8 +69,8 @@ impl ListTupleSerializer { is_type!(ob_type!(ptr), TUPLE_TYPE) || is_subclass_by_flag!(tp_flags!(ob_type!(ptr)), Py_TPFLAGS_TUPLE_SUBCLASS) ); - let data_ptr = unsafe { (*(ptr as *mut pyo3_ffi::PyTupleObject)).ob_item.as_ptr() }; - let len = ffi!(Py_SIZE(ptr)) as usize; + let data_ptr = unsafe { (*ptr.cast::()).ob_item.as_ptr() }; + let len = isize_to_usize(ffi!(Py_SIZE(ptr))); Self { data_ptr: data_ptr, len: len, diff --git a/src/serialize/per_type/numpy.rs b/src/serialize/per_type/numpy.rs index 0844292b..f888029f 100644 --- a/src/serialize/per_type/numpy.rs +++ b/src/serialize/per_type/numpy.rs @@ -1,5 +1,6 @@ -use crate::opt::*; +// SPDX-License-Identifier: (Apache-2.0 OR MIT) +use crate::opt::Opt; use crate::serialize::buffer::SmallFixedBuffer; use crate::serialize::error::SerializeError; use crate::serialize::per_type::{ @@ -7,10 +8,11 @@ use crate::serialize::per_type::{ }; use crate::serialize::serializer::PyObjectSerializer; use crate::typeref::{load_numpy_types, ARRAY_STRUCT_STR, DESCR_STR, DTYPE_STR, NUMPY_TYPES}; +use crate::util::isize_to_usize; use core::ffi::{c_char, c_int, c_void}; use jiff::civil::DateTime; use jiff::Timestamp; -use pyo3_ffi::*; +use pyo3_ffi::{PyObject, PyTypeObject, Py_intptr_t, Py_ssize_t}; use serde::ser::{self, Serialize, SerializeSeq, Serializer}; use std::fmt; @@ -36,7 +38,7 @@ impl<'a> Serialize for NumpySerializer<'a> { match NumpyArray::new(self.previous.ptr, self.previous.state.opts()) { Ok(val) => val.serialize(serializer), Err(PyArrayError::Malformed) => err!(SerializeError::NumpyMalformed), - Err(PyArrayError::NotContiguous) | Err(PyArrayError::UnsupportedDataType) + Err(PyArrayError::NotContiguous | PyArrayError::UnsupportedDataType) if self.previous.default.is_some() => { DefaultSerializer::new(self.previous).serialize(serializer) @@ -67,19 +69,19 @@ pub fn is_numpy_scalar(ob_type: *mut PyTypeObject) -> bool { false } else { let scalar_types = unsafe { numpy_types.unwrap().as_ref() }; - ob_type == scalar_types.float64 - || ob_type == scalar_types.float32 - || ob_type == scalar_types.float16 - || ob_type == scalar_types.int64 - || ob_type == scalar_types.int16 - || ob_type == scalar_types.int32 - || ob_type == scalar_types.int8 - || ob_type == scalar_types.uint64 - || ob_type == scalar_types.uint32 - || ob_type == scalar_types.uint8 - || ob_type == scalar_types.uint16 - || ob_type == scalar_types.bool_ - || ob_type == scalar_types.datetime64 + core::ptr::eq(ob_type, scalar_types.float64) + || core::ptr::eq(ob_type, scalar_types.float32) + || core::ptr::eq(ob_type, scalar_types.float16) + || core::ptr::eq(ob_type, scalar_types.int64) + || core::ptr::eq(ob_type, scalar_types.int16) + || core::ptr::eq(ob_type, scalar_types.int32) + || core::ptr::eq(ob_type, scalar_types.int8) + || core::ptr::eq(ob_type, scalar_types.uint64) + || core::ptr::eq(ob_type, scalar_types.uint32) + || core::ptr::eq(ob_type, scalar_types.uint8) + || core::ptr::eq(ob_type, scalar_types.uint16) + || core::ptr::eq(ob_type, scalar_types.bool_) + || core::ptr::eq(ob_type, scalar_types.datetime64) } } @@ -90,7 +92,7 @@ pub fn is_numpy_array(ob_type: *mut PyTypeObject) -> bool { false } else { let scalar_types = unsafe { numpy_types.unwrap().as_ref() }; - unsafe { ob_type == scalar_types.array } + unsafe { core::ptr::eq(ob_type, scalar_types.array) } } } @@ -193,7 +195,11 @@ impl NumpyArray { #[cfg_attr(feature = "optimize", optimize(size))] pub fn new(ptr: *mut PyObject, opts: Opt) -> Result { let capsule = ffi!(PyObject_GetAttr(ptr, ARRAY_STRUCT_STR)); - let array = unsafe { (*(capsule as *mut PyCapsule)).pointer as *mut PyArrayInterface }; + let array = unsafe { + (*capsule.cast::()) + .pointer + .cast::() + }; if unsafe { (*array).two != 2 } { ffi!(Py_DECREF(capsule)); Err(PyArrayError::Malformed) @@ -204,6 +210,8 @@ impl NumpyArray { ffi!(Py_DECREF(capsule)); Err(PyArrayError::NotNativeEndian) } else { + debug_assert!(unsafe { (*array).nd >= 0 }); + #[allow(clippy::cast_sign_loss)] let num_dimensions = unsafe { (*array).nd as usize }; if num_dimensions == 0 { ffi!(Py_DECREF(capsule)); @@ -220,7 +228,7 @@ impl NumpyArray { position: vec![0; num_dimensions], children: Vec::with_capacity(num_dimensions), depth: 0, - capsule: capsule as *mut PyCapsule, + capsule: capsule.cast::(), kind: kind, opts, }; @@ -252,15 +260,15 @@ impl NumpyArray { fn build(&mut self) { if self.depth < self.dimensions() - 1 { for i in 0..self.shape()[self.depth] { - let mut position: Vec = self.position.to_vec(); + let mut position: Vec = self.position.clone(); position[self.depth] = i; let num_children: usize = if self.depth < self.dimensions() - 2 { - self.shape()[self.depth + 1] as usize + isize_to_usize(self.shape()[self.depth + 1]) } else { 0 }; self.children - .push(self.child_from_parent(position, num_children)) + .push(self.child_from_parent(position, num_children)); } } } @@ -278,27 +286,30 @@ impl NumpyArray { } fn num_items(&self) -> usize { - self.shape()[self.shape().len() - 1] as usize + isize_to_usize(self.shape()[self.shape().len() - 1]) } fn dimensions(&self) -> usize { - unsafe { (*self.array).nd as usize } + #[allow(clippy::cast_sign_loss)] + unsafe { + (*self.array).nd as usize + } } fn shape(&self) -> &[isize] { - slice!((*self.array).shape as *const isize, self.dimensions()) + slice!((*self.array).shape.cast_const(), self.dimensions()) } fn strides(&self) -> &[isize] { - slice!((*self.array).strides as *const isize, self.dimensions()) + slice!((*self.array).strides.cast_const(), self.dimensions()) } } impl Drop for NumpyArray { fn drop(&mut self) { if self.depth == 0 { - ffi!(Py_DECREF(self.array as *mut pyo3_ffi::PyObject)); - ffi!(Py_DECREF(self.capsule as *mut pyo3_ffi::PyObject)); + ffi!(Py_DECREF(self.array.cast::())); + ffi!(Py_DECREF(self.capsule.cast::())); } } } @@ -322,55 +333,55 @@ impl Serialize for NumpyArray { } else { match self.kind { ItemType::F64 => { - NumpyF64Array::new(slice!(self.data() as *const f64, self.num_items())) + NumpyF64Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::F32 => { - NumpyF32Array::new(slice!(self.data() as *const f32, self.num_items())) + NumpyF32Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::F16 => { - NumpyF16Array::new(slice!(self.data() as *const u16, self.num_items())) + NumpyF16Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::U64 => { - NumpyU64Array::new(slice!(self.data() as *const u64, self.num_items())) + NumpyU64Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::U32 => { - NumpyU32Array::new(slice!(self.data() as *const u32, self.num_items())) + NumpyU32Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::U16 => { - NumpyU16Array::new(slice!(self.data() as *const u16, self.num_items())) + NumpyU16Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::U8 => { - NumpyU8Array::new(slice!(self.data() as *const u8, self.num_items())) + NumpyU8Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::I64 => { - NumpyI64Array::new(slice!(self.data() as *const i64, self.num_items())) + NumpyI64Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::I32 => { - NumpyI32Array::new(slice!(self.data() as *const i32, self.num_items())) + NumpyI32Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::I16 => { - NumpyI16Array::new(slice!(self.data() as *const i16, self.num_items())) + NumpyI16Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::I8 => { - NumpyI8Array::new(slice!(self.data() as *const i8, self.num_items())) + NumpyI8Array::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::BOOL => { - NumpyBoolArray::new(slice!(self.data() as *const u8, self.num_items())) + NumpyBoolArray::new(slice!(self.data().cast::(), self.num_items())) .serialize(serializer) } ItemType::DATETIME64(unit) => NumpyDatetime64Array::new( - slice!(self.data() as *const i64, self.num_items()), + slice!(self.data().cast::(), self.num_items()), unit, self.opts, ) @@ -624,7 +635,7 @@ impl Serialize for DataTypeU16 { where S: Serializer, { - serializer.serialize_u32(self.obj as u32) + serializer.serialize_u32(u32::from(self.obj)) } } @@ -747,7 +758,7 @@ impl Serialize for DataTypeI16 { where S: Serializer, { - serializer.serialize_i32(self.obj as i32) + serializer.serialize_i32(i32::from(self.obj)) } } @@ -788,7 +799,7 @@ impl Serialize for DataTypeI8 { where S: Serializer, { - serializer.serialize_i32(self.obj as i32) + serializer.serialize_i32(i32::from(self.obj)) } } @@ -829,7 +840,7 @@ impl Serialize for DataTypeU8 { where S: Serializer, { - serializer.serialize_u32(self.obj as u32) + serializer.serialize_u32(u32::from(self.obj)) } } @@ -896,33 +907,33 @@ impl Serialize for NumpyScalar { let ob_type = ob_type!(self.ptr); let scalar_types = unsafe { NUMPY_TYPES.get_or_init(load_numpy_types).unwrap().as_ref() }; - if ob_type == scalar_types.float64 { - (*(self.ptr as *mut NumpyFloat64)).serialize(serializer) - } else if ob_type == scalar_types.float32 { - (*(self.ptr as *mut NumpyFloat32)).serialize(serializer) - } else if ob_type == scalar_types.float16 { - (*(self.ptr as *mut NumpyFloat16)).serialize(serializer) - } else if ob_type == scalar_types.int64 { - (*(self.ptr as *mut NumpyInt64)).serialize(serializer) - } else if ob_type == scalar_types.int32 { - (*(self.ptr as *mut NumpyInt32)).serialize(serializer) - } else if ob_type == scalar_types.int16 { - (*(self.ptr as *mut NumpyInt16)).serialize(serializer) - } else if ob_type == scalar_types.int8 { - (*(self.ptr as *mut NumpyInt8)).serialize(serializer) - } else if ob_type == scalar_types.uint64 { - (*(self.ptr as *mut NumpyUint64)).serialize(serializer) - } else if ob_type == scalar_types.uint32 { - (*(self.ptr as *mut NumpyUint32)).serialize(serializer) - } else if ob_type == scalar_types.uint16 { - (*(self.ptr as *mut NumpyUint16)).serialize(serializer) - } else if ob_type == scalar_types.uint8 { - (*(self.ptr as *mut NumpyUint8)).serialize(serializer) - } else if ob_type == scalar_types.bool_ { - (*(self.ptr as *mut NumpyBool)).serialize(serializer) - } else if ob_type == scalar_types.datetime64 { + if core::ptr::eq(ob_type, scalar_types.float64) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.float32) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.float16) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.int64) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.int32) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.int16) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.int8) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.uint64) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.uint32) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.uint16) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.uint8) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.bool_) { + (*(self.ptr.cast::())).serialize(serializer) + } else if core::ptr::eq(ob_type, scalar_types.datetime64) { let unit = NumpyDatetimeUnit::from_pyobject(self.ptr); - let obj = &*(self.ptr as *mut NumpyDatetime64); + let obj = &*self.ptr.cast::(); let dt = unit .datetime(obj.value, self.opts) .map_err(NumpyDateTimeError::into_serde_err)?; @@ -947,7 +958,7 @@ impl Serialize for NumpyInt8 { where S: Serializer, { - serializer.serialize_i32(self.value as i32) + serializer.serialize_i32(i32::from(self.value)) } } @@ -964,7 +975,7 @@ impl Serialize for NumpyInt16 { where S: Serializer, { - serializer.serialize_i32(self.value as i32) + serializer.serialize_i32(i32::from(self.value)) } } @@ -1015,7 +1026,7 @@ impl Serialize for NumpyUint8 { where S: Serializer, { - serializer.serialize_u32(self.value as u32) + serializer.serialize_u32(u32::from(self.value)) } } @@ -1032,7 +1043,7 @@ impl Serialize for NumpyUint16 { where S: Serializer, { - serializer.serialize_u32(self.value as u32) + serializer.serialize_u32(u32::from(self.value)) } } @@ -1182,7 +1193,7 @@ impl fmt::Display for NumpyDatetimeUnit { Self::Attoseconds => "attoseconds", Self::Generic => "generic", }; - write!(f, "{}", unit) + write!(f, "{unit}") } } @@ -1196,9 +1207,9 @@ impl NumpyDateTimeError { #[cold] fn into_serde_err(self) -> T { let err = match self { - Self::UnsupportedUnit(unit) => format!("unsupported numpy.datetime64 unit: {}", unit), + Self::UnsupportedUnit(unit) => format!("unsupported numpy.datetime64 unit: {unit}"), Self::Unrepresentable { unit, val } => { - format!("unrepresentable numpy.datetime64: {} {}", val, unit) + format!("unrepresentable numpy.datetime64: {val} {unit}") } }; ser::Error::custom(err) @@ -1209,7 +1220,7 @@ macro_rules! to_jiff_datetime { ($timestamp:expr, $self:expr, $val:expr) => { Ok( ($timestamp.map_err(|_| NumpyDateTimeError::Unrepresentable { - unit: *$self, + unit: $self, val: $val, })?) .to_zoned(jiff::tz::TimeZone::UTC) @@ -1268,12 +1279,12 @@ impl NumpyDatetimeUnit { /// Returns an `Err(NumpyDateTimeError)` if the value is invalid for this unit. #[cold] #[cfg_attr(feature = "optimize", optimize(size))] - fn datetime(&self, val: i64, opts: Opt) -> Result { + fn datetime(self, val: i64, opts: Opt) -> Result { match self { Self::Years => Ok(DateTime::new( (val + 1970) .try_into() - .map_err(|_| NumpyDateTimeError::Unrepresentable { unit: *self, val })?, + .map_err(|_| NumpyDateTimeError::Unrepresentable { unit: self, val })?, 1, 1, 0, @@ -1285,10 +1296,10 @@ impl NumpyDatetimeUnit { Self::Months => Ok(DateTime::new( (val / 12 + 1970) .try_into() - .map_err(|_| NumpyDateTimeError::Unrepresentable { unit: *self, val })?, + .map_err(|_| NumpyDateTimeError::Unrepresentable { unit: self, val })?, (val % 12 + 1) .try_into() - .map_err(|_| NumpyDateTimeError::Unrepresentable { unit: *self, val })?, + .map_err(|_| NumpyDateTimeError::Unrepresentable { unit: self, val })?, 1, 0, 0, @@ -1306,9 +1317,9 @@ impl NumpyDatetimeUnit { Self::Milliseconds => to_jiff_datetime!(Timestamp::from_millisecond(val), self, val), Self::Microseconds => to_jiff_datetime!(Timestamp::from_microsecond(val), self, val), Self::Nanoseconds => { - to_jiff_datetime!(Timestamp::from_nanosecond(val as i128), self, val) + to_jiff_datetime!(Timestamp::from_nanosecond(i128::from(val)), self, val) } - _ => Err(NumpyDateTimeError::UnsupportedUnit(*self)), + _ => Err(NumpyDateTimeError::UnsupportedUnit(self)), } .map(|dt| NumpyDatetime64Repr { dt, opts }) } @@ -1354,7 +1365,10 @@ pub struct NumpyDatetime64 { macro_rules! forward_inner { ($meth: ident, $ty: ident) => { fn $meth(&self) -> $ty { - self.dt.$meth() as $ty + debug_assert!(self.dt.$meth() >= 0); + #[allow(clippy::cast_sign_loss)] + let ret = self.dt.$meth() as $ty; // stmt_expr_attributes + ret } }; } @@ -1373,7 +1387,10 @@ impl DateTimeLike for NumpyDatetime64Repr { forward_inner!(second, u8); fn nanosecond(&self) -> u32 { - self.dt.subsec_nanosecond() as u32 + debug_assert!(self.dt.subsec_nanosecond() >= 0); + #[allow(clippy::cast_sign_loss)] + let ret = self.dt.subsec_nanosecond() as u32; // stmt_expr_attributes + ret } fn microsecond(&self) -> u32 { diff --git a/src/serialize/per_type/pybool.rs b/src/serialize/per_type/pybool.rs index 6109693b..f6c495cf 100644 --- a/src/serialize/per_type/pybool.rs +++ b/src/serialize/per_type/pybool.rs @@ -19,6 +19,6 @@ impl Serialize for BoolSerializer { where S: Serializer, { - serializer.serialize_bool(unsafe { self.ptr == crate::typeref::TRUE }) + serializer.serialize_bool(unsafe { core::ptr::eq(self.ptr, crate::typeref::TRUE) }) } } diff --git a/src/serialize/per_type/unicode.rs b/src/serialize/per_type/unicode.rs index ecc8ae6c..23270916 100644 --- a/src/serialize/per_type/unicode.rs +++ b/src/serialize/per_type/unicode.rs @@ -26,7 +26,7 @@ impl Serialize for StrSerializer { let tmp = unicode_to_str(self.ptr); if unlikely!(tmp.is_none()) { err!(SerializeError::InvalidStr) - }; + } tmp.unwrap() }; serializer.serialize_str(uni) diff --git a/src/serialize/per_type/uuid.rs b/src/serialize/per_type/uuid.rs index f065fd2a..12e5bb78 100644 --- a/src/serialize/per_type/uuid.rs +++ b/src/serialize/per_type/uuid.rs @@ -22,16 +22,26 @@ impl UUID { // test_uuid_immutable, test_uuid_int let py_int = ffi!(PyObject_GetAttr(self.ptr, INT_ATTR_STR)); ffi!(Py_DECREF(py_int)); - let buffer: [c_uchar; 16] = [0; 16]; + let mut buffer: [c_uchar; 16] = [0; 16]; unsafe { // test_uuid_overflow + #[cfg(not(Py_3_13))] pyo3_ffi::_PyLong_AsByteArray( - py_int as *mut pyo3_ffi::PyLongObject, - buffer.as_ptr() as *mut c_uchar, + py_int.cast::(), + buffer.as_mut_ptr(), 16, 1, // little_endian 0, // is_signed - ) + ); + #[cfg(Py_3_13)] + pyo3_ffi::_PyLong_AsByteArray( + py_int.cast::(), + buffer.as_mut_ptr(), + 16, + 1, // little_endian + 0, // is_signed + 0, + ); }; value = u128::from_le_bytes(buffer); } diff --git a/src/serialize/serializer.rs b/src/serialize/serializer.rs index 852d31e0..7b9a5de5 100644 --- a/src/serialize/serializer.rs +++ b/src/serialize/serializer.rs @@ -27,7 +27,7 @@ pub fn serialize( to_writer_pretty(&mut buf, &obj) }; match res { - Ok(_) => { + Ok(()) => { if opt_enabled!(opts, APPEND_NEWLINE) { let _ = buf.write(b"\n"); } diff --git a/src/serialize/state.rs b/src/serialize/state.rs index 91b5b0aa..81094731 100644 --- a/src/serialize/state.rs +++ b/src/serialize/state.rs @@ -1,6 +1,6 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::opt::*; +use crate::opt::Opt; const RECURSION_SHIFT: usize = 24; const RECURSION_MASK: u32 = 255 << RECURSION_SHIFT; @@ -20,27 +20,27 @@ pub struct SerializerState { impl SerializerState { #[inline(always)] pub fn new(opts: Opt) -> Self { - debug_assert!(opts < u16::MAX as u32); + debug_assert!(opts < u32::from(u16::MAX)); Self { state: opts } } #[inline(always)] - pub fn opts(&self) -> u32 { + pub fn opts(self) -> u32 { self.state } #[inline(always)] - pub fn recursion_limit(&self) -> bool { + pub fn recursion_limit(self) -> bool { self.state & RECURSION_MASK == RECURSION_MASK } #[inline(always)] - pub fn default_calls_limit(&self) -> bool { + pub fn default_calls_limit(self) -> bool { self.state & DEFAULT_MASK == DEFAULT_MASK } #[inline(always)] - pub fn copy_for_recursive_call(&self) -> Self { + pub fn copy_for_recursive_call(self) -> Self { let opt = self.state & !RECURSION_MASK; let recursion = (((self.state & RECURSION_MASK) >> RECURSION_SHIFT) + 1) << RECURSION_SHIFT; Self { @@ -49,7 +49,7 @@ impl SerializerState { } #[inline(always)] - pub fn copy_for_default_call(&self) -> Self { + pub fn copy_for_default_call(self) -> Self { let opt = self.state & !DEFAULT_MASK; let default_calls = (((self.state & DEFAULT_MASK) >> DEFAULT_SHIFT) + 1) << DEFAULT_SHIFT; Self { diff --git a/src/serialize/writer/byteswriter.rs b/src/serialize/writer/byteswriter.rs index dd121838..b2d13d78 100644 --- a/src/serialize/writer/byteswriter.rs +++ b/src/serialize/writer/byteswriter.rs @@ -1,10 +1,8 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use core::ffi::c_char; +use crate::util::usize_to_isize; use core::ptr::NonNull; -use pyo3_ffi::{ - PyBytesObject, PyBytes_FromStringAndSize, PyObject, PyVarObject, Py_ssize_t, _PyBytes_Resize, -}; +use pyo3_ffi::{PyBytesObject, PyBytes_FromStringAndSize, PyObject, PyVarObject, _PyBytes_Resize}; use std::io::Error; const BUFFER_LENGTH: usize = 1024; @@ -21,43 +19,36 @@ impl BytesWriter { cap: BUFFER_LENGTH, len: 0, bytes: unsafe { - PyBytes_FromStringAndSize(core::ptr::null_mut(), BUFFER_LENGTH as isize) - as *mut PyBytesObject + PyBytes_FromStringAndSize(core::ptr::null_mut(), usize_to_isize(BUFFER_LENGTH)) + .cast::() }, } } pub fn bytes_ptr(&mut self) -> NonNull { - unsafe { NonNull::new_unchecked(self.bytes as *mut PyObject) } + unsafe { NonNull::new_unchecked(self.bytes.cast::()) } } pub fn finish(&mut self) -> NonNull { unsafe { core::ptr::write(self.buffer_ptr(), 0); - (*self.bytes.cast::()).ob_size = self.len as Py_ssize_t; + (*self.bytes.cast::()).ob_size = usize_to_isize(self.len); self.resize(self.len); self.bytes_ptr() } } fn buffer_ptr(&self) -> *mut u8 { - unsafe { - core::mem::transmute::<*mut [c_char; 1], *mut u8>(core::ptr::addr_of_mut!( - (*self.bytes).ob_sval - )) - .add(self.len) - } + unsafe { (&raw mut (*self.bytes).ob_sval).cast::().add(self.len) } } #[inline] pub fn resize(&mut self, len: usize) { self.cap = len; unsafe { - #[allow(clippy::unnecessary_cast)] _PyBytes_Resize( - core::ptr::addr_of_mut!(self.bytes) as *mut *mut PyBytesObject - as *mut *mut PyObject, - len as isize, + (&raw mut self.bytes).cast::<*mut PyObject>(), + usize_to_isize(len), ); } } diff --git a/src/serialize/writer/json.rs b/src/serialize/writer/json.rs index cbb5fb33..c893d5e5 100644 --- a/src/serialize/writer/json.rs +++ b/src/serialize/writer/json.rs @@ -2,7 +2,6 @@ // This is an adaptation of `src/value/ser.rs` from serde-json. use crate::serialize::writer::formatter::{CompactFormatter, Formatter, PrettyFormatter}; -use crate::serialize::writer::str::*; use crate::serialize::writer::WriteExt; use serde::ser::{self, Impossible, Serialize}; use serde_json::error::{Error, Result}; @@ -572,7 +571,23 @@ macro_rules! reserve_str { }; } -#[cfg(all(feature = "unstable-simd", not(target_arch = "x86_64")))] +#[cfg(all(target_arch = "x86_64", feature = "avx512"))] +type StrFormatter = unsafe fn(*mut u8, *const u8, usize) -> usize; + +#[cfg(all(target_arch = "x86_64", feature = "avx512"))] +static mut STR_FORMATTER_FN: StrFormatter = + crate::serialize::writer::str::format_escaped_str_impl_sse2_128; + +pub fn set_str_formatter_fn() { + unsafe { + #[cfg(all(target_arch = "x86_64", feature = "avx512"))] + if std::is_x86_feature_detected!("avx512vl") { + STR_FORMATTER_FN = crate::serialize::writer::str::format_escaped_str_impl_512vl; + } + } +} + +#[cfg(all(target_arch = "x86_64", not(feature = "avx512")))] #[inline(always)] fn format_escaped_str(writer: &mut W, value: &str) where @@ -581,7 +596,7 @@ where unsafe { reserve_str!(writer, value); - let written = format_escaped_str_impl_generic_128( + let written = crate::serialize::writer::str::format_escaped_str_impl_sse2_128( writer.as_mut_buffer_ptr(), value.as_bytes().as_ptr(), value.len(), @@ -591,11 +606,7 @@ where } } -#[cfg(all( - feature = "unstable-simd", - target_arch = "x86_64", - not(feature = "avx512") -))] +#[cfg(all(target_arch = "x86_64", feature = "avx512"))] #[inline(always)] fn format_escaped_str(writer: &mut W, value: &str) where @@ -604,7 +615,7 @@ where unsafe { reserve_str!(writer, value); - let written = format_escaped_str_impl_sse2_128( + let written = STR_FORMATTER_FN( writer.as_mut_buffer_ptr(), value.as_bytes().as_ptr(), value.len(), @@ -614,34 +625,11 @@ where } } -#[cfg(all(feature = "unstable-simd", target_arch = "x86_64", feature = "avx512"))] -#[inline(always)] -fn format_escaped_str(writer: &mut W, value: &str) -where - W: ?Sized + io::Write + WriteExt, -{ - unsafe { - reserve_str!(writer, value); - - if std::is_x86_feature_detected!("avx512vl") { - let written = format_escaped_str_impl_512vl( - writer.as_mut_buffer_ptr(), - value.as_bytes().as_ptr(), - value.len(), - ); - writer.set_written(written); - } else { - let written = format_escaped_str_impl_sse2_128( - writer.as_mut_buffer_ptr(), - value.as_bytes().as_ptr(), - value.len(), - ); - writer.set_written(written); - }; - } -} - -#[cfg(all(not(feature = "unstable-simd"), not(target_arch = "x86_64")))] +#[cfg(all( + not(target_arch = "x86_64"), + not(feature = "avx512"), + feature = "generic_simd" +))] #[inline(always)] fn format_escaped_str(writer: &mut W, value: &str) where @@ -650,16 +638,17 @@ where unsafe { reserve_str!(writer, value); - let written = format_escaped_str_scalar( + let written = crate::serialize::writer::str::format_escaped_str_impl_generic_128( writer.as_mut_buffer_ptr(), value.as_bytes().as_ptr(), value.len(), ); + writer.set_written(written); } } -#[cfg(all(not(feature = "unstable-simd"), target_arch = "x86_64"))] +#[cfg(all(not(target_arch = "x86_64"), not(feature = "generic_simd")))] #[inline(always)] fn format_escaped_str(writer: &mut W, value: &str) where @@ -668,7 +657,7 @@ where unsafe { reserve_str!(writer, value); - let written = format_escaped_str_impl_sse2_128( + let written = crate::serialize::writer::str::format_escaped_str_scalar( writer.as_mut_buffer_ptr(), value.as_bytes().as_ptr(), value.len(), diff --git a/src/serialize/writer/mod.rs b/src/serialize/writer/mod.rs index 3e945b2c..942e66b4 100644 --- a/src/serialize/writer/mod.rs +++ b/src/serialize/writer/mod.rs @@ -6,4 +6,4 @@ mod json; mod str; pub use byteswriter::{BytesWriter, WriteExt}; -pub use json::{to_writer, to_writer_pretty}; +pub use json::{set_str_formatter_fn, to_writer, to_writer_pretty}; diff --git a/src/serialize/writer/str/avx512.rs b/src/serialize/writer/str/avx512.rs index 983d4a30..02b7d81e 100644 --- a/src/serialize/writer/str/avx512.rs +++ b/src/serialize/writer/str/avx512.rs @@ -1,114 +1,89 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use core::mem::transmute; - -use super::escape::QUOTE_TAB; - use core::arch::x86_64::{ - __m256i, _mm256_cmpeq_epu8_mask, _mm256_cmplt_epu8_mask, _mm256_load_si256, _mm256_loadu_si256, - _mm256_maskz_loadu_epi8, _mm256_storeu_epi8, + _mm256_cmpeq_epu8_mask, _mm256_cmplt_epu8_mask, _mm256_loadu_epi8, _mm256_maskz_loadu_epi8, + _mm256_set1_epi8, _mm256_storeu_epi8, }; -#[repr(C, align(32))] -struct ConstArray { - pub data: [u8; 32], +macro_rules! splat_mm256 { + ($val:expr) => { + _mm256_set1_epi8(core::mem::transmute::($val)) + }; } -const BLASH: ConstArray = ConstArray { data: [b'\\'; 32] }; -const QUOTE: ConstArray = ConstArray { data: [b'"'; 32] }; -const X20: ConstArray = ConstArray { data: [32; 32] }; +#[inline(never)] +#[target_feature(enable = "avx512f,avx512bw,avx512vl,bmi2")] +pub unsafe fn format_escaped_str_impl_512vl( + odst: *mut u8, + value_ptr: *const u8, + value_len: usize, +) -> usize { + unsafe { + const STRIDE: usize = 32; -macro_rules! impl_format_simd_avx512vl { - ($dst:expr, $src:expr, $value_len:expr) => { - let mut nb: usize = $value_len; + let mut dst = odst; + let mut src = value_ptr; + let mut nb: usize = value_len; + + let blash = splat_mm256!(b'\\'); + let quote = splat_mm256!(b'"'); + let x20 = splat_mm256!(32); - let blash = _mm256_load_si256(BLASH.data.as_ptr() as *const __m256i); - let quote = _mm256_load_si256(QUOTE.data.as_ptr() as *const __m256i); - let x20 = _mm256_load_si256(X20.data.as_ptr() as *const __m256i); + core::ptr::write(dst, b'"'); + dst = dst.add(1); unsafe { while nb >= STRIDE { - let str_vec = _mm256_loadu_si256(transmute::<*const u8, *const __m256i>($src)); + let str_vec = _mm256_loadu_epi8(src.cast::()); - _mm256_storeu_epi8($dst as *mut i8, str_vec); + _mm256_storeu_epi8(dst.cast::(), str_vec); let mask = _mm256_cmpeq_epu8_mask(str_vec, blash) | _mm256_cmpeq_epu8_mask(str_vec, quote) | _mm256_cmplt_epu8_mask(str_vec, x20); - if unlikely!(mask > 0) { + if mask != 0 { let cn = trailing_zeros!(mask); - $src = $src.add(cn); - $dst = $dst.add(cn); + src = src.add(cn); + dst = dst.add(cn); nb -= cn; nb -= 1; - let escape = QUOTE_TAB[*($src) as usize]; - $src = $src.add(1); - - write_escape!(escape, $dst); - $dst = $dst.add(escape.1 as usize); + write_escape!(*(src), dst); + src = src.add(1); } else { nb -= STRIDE; - $dst = $dst.add(STRIDE); - $src = $src.add(STRIDE); + dst = dst.add(STRIDE); + src = src.add(STRIDE); } } - if nb > 0 { - loop { - let remainder_mask = !(u32::MAX << nb); - let str_vec = _mm256_maskz_loadu_epi8(remainder_mask, $src as *const i8); - - _mm256_storeu_epi8($dst as *mut i8, str_vec); - - let mask = (_mm256_cmpeq_epu8_mask(str_vec, blash) - | _mm256_cmpeq_epu8_mask(str_vec, quote) - | _mm256_cmplt_epu8_mask(str_vec, x20)) - & remainder_mask; - - if unlikely!(mask > 0) { - let cn = trailing_zeros!(mask); - $src = $src.add(cn); - $dst = $dst.add(cn); - nb -= cn; - nb -= 1; - - let escape = QUOTE_TAB[*($src) as usize]; - $src = $src.add(1); - - write_escape!(escape, $dst); - $dst = $dst.add(escape.1 as usize); - } else { - $dst = $dst.add(nb); - break; - } - } - } - } - }; -} + loop { + let remainder_mask = !(u32::MAX << nb); + let str_vec = _mm256_maskz_loadu_epi8(remainder_mask, src.cast::()); -#[inline(never)] -#[cfg_attr( - feature = "avx512", - target_feature(enable = "avx512f,avx512bw,avx512vl,bmi2") -)] -pub unsafe fn format_escaped_str_impl_512vl( - odst: *mut u8, - value_ptr: *const u8, - value_len: usize, -) -> usize { - unsafe { - const STRIDE: usize = 32; + _mm256_storeu_epi8(dst.cast::(), str_vec); - let mut dst = odst; - let mut src = value_ptr; + let mask = (_mm256_cmpeq_epu8_mask(str_vec, blash) + | _mm256_cmpeq_epu8_mask(str_vec, quote) + | _mm256_cmplt_epu8_mask(str_vec, x20)) + & remainder_mask; - core::ptr::write(dst, b'"'); - dst = dst.add(1); + if mask != 0 { + let cn = trailing_zeros!(mask); + src = src.add(cn); + dst = dst.add(cn); + nb -= cn; + nb -= 1; - impl_format_simd_avx512vl!(dst, src, value_len); + write_escape!(*(src), dst); + src = src.add(1); + } else { + dst = dst.add(nb); + break; + } + } + } core::ptr::write(dst, b'"'); dst = dst.add(1); diff --git a/src/serialize/writer/str/escape.rs b/src/serialize/writer/str/escape.rs index b1c8e0e8..915d54fa 100644 --- a/src/serialize/writer/str/escape.rs +++ b/src/serialize/writer/str/escape.rs @@ -1,9 +1,25 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) // the constants and SIMD approach are adapted from cloudwego's sonic-rs +#[cfg(feature = "inline_int")] macro_rules! write_escape { - ($escape:expr, $dst:expr) => { - core::ptr::copy_nonoverlapping($escape.0.as_ptr(), $dst, 8); + ($byte:expr, $dst:expr) => { + debug_assert!($byte < 96); + let escape = u64::from_ne_bytes( + *crate::serialize::writer::str::escape::QUOTE_TAB.get_unchecked($byte as usize), + ); + core::ptr::write($dst.cast::(), escape); + $dst = $dst.add((escape as usize) >> 56); + }; +} + +#[cfg(not(feature = "inline_int"))] +macro_rules! write_escape { + ($byte:expr, $dst:expr) => { + debug_assert!($byte < 96); + let escape = crate::serialize::writer::str::escape::QUOTE_TAB.get_unchecked($byte as usize); + core::ptr::copy_nonoverlapping(escape.as_ptr(), $dst, 8); + $dst = $dst.add(((*escape.as_ptr().add(7)) as usize)); }; } @@ -18,101 +34,101 @@ pub const NEED_ESCAPED: [u8; 256] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; -pub const QUOTE_TAB: [([u8; 7], u8); 96] = [ - (*b"\\u0000\0", 6), - (*b"\\u0001\0", 6), - (*b"\\u0002\0", 6), - (*b"\\u0003\0", 6), - (*b"\\u0004\0", 6), - (*b"\\u0005\0", 6), - (*b"\\u0006\0", 6), - (*b"\\u0007\0", 6), - (*b"\\b\0\0\0\0\0", 2), - (*b"\\t\0\0\0\0\0", 2), - (*b"\\n\0\0\0\0\0", 2), - (*b"\\u000b\0", 6), - (*b"\\f\0\0\0\0\0", 2), - (*b"\\r\0\0\0\0\0", 2), - (*b"\\u000e\0", 6), - (*b"\\u000f\0", 6), - (*b"\\u0010\0", 6), - (*b"\\u0011\0", 6), - (*b"\\u0012\0", 6), - (*b"\\u0013\0", 6), - (*b"\\u0014\0", 6), - (*b"\\u0015\0", 6), - (*b"\\u0016\0", 6), - (*b"\\u0017\0", 6), - (*b"\\u0018\0", 6), - (*b"\\u0019\0", 6), - (*b"\\u001a\0", 6), - (*b"\\u001b\0", 6), - (*b"\\u001c\0", 6), - (*b"\\u001d\0", 6), - (*b"\\u001e\0", 6), - (*b"\\u001f\0", 6), - ([0; 7], 0), - ([0; 7], 0), - (*b"\\\"\0\0\0\0\0", 2), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), - (*b"\\\\\0\0\0\0\0", 2), - ([0; 7], 0), - ([0; 7], 0), - ([0; 7], 0), +pub const QUOTE_TAB: [[u8; 8]; 96] = [ + [b'\\', b'u', b'0', b'0', b'0', b'0', 0, 6], + [b'\\', b'u', b'0', b'0', b'0', b'1', 0, 6], + [b'\\', b'u', b'0', b'0', b'0', b'2', 0, 6], + [b'\\', b'u', b'0', b'0', b'0', b'3', 0, 6], + [b'\\', b'u', b'0', b'0', b'0', b'4', 0, 6], + [b'\\', b'u', b'0', b'0', b'0', b'5', 0, 6], + [b'\\', b'u', b'0', b'0', b'0', b'6', 0, 6], + [b'\\', b'u', b'0', b'0', b'0', b'7', 0, 6], + [b'\\', b'b', b'0', b'0', b'0', b'0', 0, 2], + [b'\\', b't', b'0', b'0', b'0', b'0', 0, 2], + [b'\\', b'n', b'0', b'0', b'0', b'0', 0, 2], + [b'\\', b'u', b'0', b'0', b'0', b'b', 0, 6], + [b'\\', b'f', b'0', b'0', b'0', b'0', 0, 2], + [b'\\', b'r', b'0', b'0', b'0', b'0', 0, 2], + [b'\\', b'u', b'0', b'0', b'0', b'e', 0, 6], + [b'\\', b'u', b'0', b'0', b'0', b'f', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'0', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'1', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'2', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'3', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'4', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'5', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'6', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'7', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'8', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'9', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'a', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'b', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'c', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'd', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'e', 0, 6], + [b'\\', b'u', b'0', b'0', b'1', b'f', 0, 6], + [0; 8], + [0; 8], + [b'\\', b'"', 0, 0, 0, 0, 0, 2], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [0; 8], + [b'\\', b'\\', 0, 0, 0, 0, 0, 2], + [0; 8], + [0; 8], + [0; 8], ]; diff --git a/src/serialize/writer/str/generic.rs b/src/serialize/writer/str/generic.rs index e7973f2a..c4e60d10 100644 --- a/src/serialize/writer/str/generic.rs +++ b/src/serialize/writer/str/generic.rs @@ -1,46 +1,58 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use super::escape::{NEED_ESCAPED, QUOTE_TAB}; use core::simd::cmp::{SimdPartialEq, SimdPartialOrd}; -macro_rules! impl_format_simd_generic_128 { - ($dst:expr, $src:expr, $value_len:expr) => { - let last_stride_src = $src.add($value_len).sub(STRIDE); - let mut nb: usize = $value_len; +#[inline(never)] +#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))] +pub unsafe fn format_escaped_str_impl_generic_128( + odst: *mut u8, + value_ptr: *const u8, + value_len: usize, +) -> usize { + unsafe { + const STRIDE: usize = 16; + type StrVector = core::simd::u8x16; - assume!($value_len >= STRIDE); + let mut dst = odst; + let mut src = value_ptr; - const BLASH: StrVector = StrVector::from_array([b'\\'; STRIDE]); - const QUOTE: StrVector = StrVector::from_array([b'"'; STRIDE]); - const X20: StrVector = StrVector::from_array([32; STRIDE]); + core::ptr::write(dst, b'"'); + dst = dst.add(1); - unsafe { - { - while nb >= STRIDE { - let v = StrVector::from_slice(core::slice::from_raw_parts($src, STRIDE)); - let mask = - (v.simd_eq(BLASH) | v.simd_eq(QUOTE) | v.simd_lt(X20)).to_bitmask() as u32; - v.copy_to_slice(core::slice::from_raw_parts_mut($dst, STRIDE)); + if value_len < STRIDE { + impl_format_scalar!(dst, src, value_len); + } else { + let blash: StrVector = StrVector::splat(b'\\'); + let quote: StrVector = StrVector::splat(b'"'); + let x20: StrVector = StrVector::splat(32); - if unlikely!(mask > 0) { - let cn = trailing_zeros!(mask) as usize; - nb -= cn; - $dst = $dst.add(cn); - $src = $src.add(cn); - nb -= 1; - let escape = QUOTE_TAB[*($src) as usize]; - write_escape!(escape, $dst); - $dst = $dst.add(escape.1 as usize); - $src = $src.add(1); - } else { - nb -= STRIDE; - $dst = $dst.add(STRIDE); - $src = $src.add(STRIDE); + let last_stride_src = src.add(value_len).sub(STRIDE); + let mut nb: usize = value_len; + + unsafe { + { + while nb >= STRIDE { + let v = StrVector::from_slice(core::slice::from_raw_parts(src, STRIDE)); + let mask = (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)) + .to_bitmask() as u32; + v.copy_to_slice(core::slice::from_raw_parts_mut(dst, STRIDE)); + + if mask != 0 { + let cn = trailing_zeros!(mask) as usize; + nb -= cn; + dst = dst.add(cn); + src = src.add(cn); + nb -= 1; + write_escape!(*(src), dst); + src = src.add(1); + } else { + nb -= STRIDE; + dst = dst.add(STRIDE); + src = src.add(STRIDE); + } } } - } - if nb > 0 { let mut scratch: [u8; 32] = [b'a'; 32]; let mut v = StrVector::from_slice(core::slice::from_raw_parts(last_stride_src, STRIDE)); @@ -52,56 +64,27 @@ macro_rules! impl_format_simd_generic_128 { let mut scratch_ptr = scratch.as_mut_ptr().add(16 - nb); v = StrVector::from_slice(core::slice::from_raw_parts(scratch_ptr, STRIDE)); let mut mask = - (v.simd_eq(BLASH) | v.simd_eq(QUOTE) | v.simd_lt(X20)).to_bitmask() as u32; + (v.simd_eq(blash) | v.simd_eq(quote) | v.simd_lt(x20)).to_bitmask() as u32; - while nb > 0 { - v.copy_to_slice(core::slice::from_raw_parts_mut($dst, STRIDE)); - if unlikely!(mask > 0) { + loop { + v.copy_to_slice(core::slice::from_raw_parts_mut(dst, STRIDE)); + if mask != 0 { let cn = trailing_zeros!(mask) as usize; nb -= cn; - $dst = $dst.add(cn); + dst = dst.add(cn); scratch_ptr = scratch_ptr.add(cn); nb -= 1; mask >>= cn + 1; - let escape = QUOTE_TAB[*(scratch_ptr) as usize]; - write_escape!(escape, $dst); - $dst = $dst.add(escape.1 as usize); + write_escape!(*(scratch_ptr), dst); scratch_ptr = scratch_ptr.add(1); v = StrVector::from_slice(core::slice::from_raw_parts(scratch_ptr, STRIDE)); } else { - $dst = $dst.add(nb); + dst = dst.add(nb); break; } } } } - }; -} - -#[allow(dead_code)] -#[inline(never)] -#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse2,bmi1"))] -#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))] -pub unsafe fn format_escaped_str_impl_generic_128( - odst: *mut u8, - value_ptr: *const u8, - value_len: usize, -) -> usize { - unsafe { - const STRIDE: usize = 16; - type StrVector = core::simd::u8x16; - - let mut dst = odst; - let mut src = value_ptr; - - core::ptr::write(dst, b'"'); - dst = dst.add(1); - - if value_len < STRIDE { - impl_format_scalar!(dst, src, value_len) - } else { - impl_format_simd_generic_128!(dst, src, value_len); - } core::ptr::write(dst, b'"'); dst = dst.add(1); diff --git a/src/serialize/writer/str/mod.rs b/src/serialize/writer/str/mod.rs index 6b18f066..81d49af8 100644 --- a/src/serialize/writer/str/mod.rs +++ b/src/serialize/writer/str/mod.rs @@ -5,25 +5,24 @@ mod escape; #[macro_use] mod scalar; +#[cfg(all(feature = "generic_simd", not(target_arch = "x86_64")))] +mod generic; + #[cfg(target_arch = "x86_64")] mod sse2; -#[cfg(all(feature = "unstable-simd", target_arch = "x86_64", feature = "avx512"))] +#[cfg(all(target_arch = "x86_64", feature = "avx512"))] mod avx512; -#[cfg(feature = "unstable-simd")] -mod generic; - -#[cfg(all(not(feature = "unstable-simd"), not(target_arch = "x86_64")))] +#[cfg(all(not(target_arch = "x86_64"), not(feature = "generic_simd")))] pub use scalar::format_escaped_str_scalar; -#[allow(unused_imports)] -#[cfg(feature = "unstable-simd")] -pub use generic::format_escaped_str_impl_generic_128; - -#[cfg(all(feature = "unstable-simd", target_arch = "x86_64", feature = "avx512"))] +#[cfg(all(target_arch = "x86_64", feature = "avx512"))] pub use avx512::format_escaped_str_impl_512vl; #[allow(unused_imports)] #[cfg(target_arch = "x86_64")] pub use sse2::format_escaped_str_impl_sse2_128; + +#[cfg(all(feature = "generic_simd", not(target_arch = "x86_64")))] +pub use generic::format_escaped_str_impl_generic_128; diff --git a/src/serialize/writer/str/scalar.rs b/src/serialize/writer/str/scalar.rs index 29150809..75cfc9c0 100644 --- a/src/serialize/writer/str/scalar.rs +++ b/src/serialize/writer/str/scalar.rs @@ -1,8 +1,5 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -#[cfg(all(not(feature = "unstable-simd"), not(target_arch = "x86_64")))] -use super::escape::{NEED_ESCAPED, QUOTE_TAB}; - macro_rules! impl_format_scalar { ($dst:expr, $src:expr, $value_len:expr) => { unsafe { @@ -10,17 +7,16 @@ macro_rules! impl_format_scalar { core::ptr::write($dst, *($src)); $src = $src.add(1); $dst = $dst.add(1); - if unlikely!(NEED_ESCAPED[*($src.sub(1)) as usize] > 0) { - let escape = QUOTE_TAB[*($src.sub(1)) as usize]; - write_escape!(escape, $dst.sub(1)); - $dst = $dst.add(escape.1 as usize - 1); + if *super::escape::NEED_ESCAPED.get_unchecked(*($src.sub(1)) as usize) != 0 { + $dst = $dst.sub(1); + write_escape!(*($src.sub(1)), $dst); } } } }; } -#[cfg(all(not(feature = "unstable-simd"), not(target_arch = "x86_64")))] +#[cfg(all(not(target_arch = "x86_64"), not(feature = "generic_simd")))] pub unsafe fn format_escaped_str_scalar( odst: *mut u8, value_ptr: *const u8, diff --git a/src/serialize/writer/str/sse2.rs b/src/serialize/writer/str/sse2.rs index 43def30a..2150d546 100644 --- a/src/serialize/writer/str/sse2.rs +++ b/src/serialize/writer/str/sse2.rs @@ -1,9 +1,5 @@ // SPDX-License-Identifier: Apache-2.0 -use super::escape::{NEED_ESCAPED, QUOTE_TAB}; - -use core::mem::transmute; - use core::arch::x86_64::{ __m128i, _mm_cmpeq_epi8, _mm_loadu_si128, _mm_movemask_epi8, _mm_or_si128, _mm_set1_epi8, _mm_setzero_si128, _mm_storeu_si128, _mm_subs_epu8, @@ -11,57 +7,70 @@ use core::arch::x86_64::{ macro_rules! splat_mm128 { ($val:expr) => { - _mm_set1_epi8(transmute::($val)) + _mm_set1_epi8(core::mem::transmute::($val)) }; } -macro_rules! impl_format_simd_sse2_128 { - ($dst:expr, $src:expr, $value_len:expr) => { - let last_stride_src = $src.add($value_len).sub(STRIDE); - let mut nb: usize = $value_len; +#[allow(dead_code)] +#[expect(clippy::cast_ptr_alignment)] +#[inline(never)] +pub unsafe fn format_escaped_str_impl_sse2_128( + odst: *mut u8, + value_ptr: *const u8, + value_len: usize, +) -> usize { + unsafe { + const STRIDE: usize = 16; - assume!($value_len >= STRIDE); + let mut dst = odst; + let mut src = value_ptr; - let blash = splat_mm128!(b'\\'); - let quote = splat_mm128!(b'"'); - let x20 = splat_mm128!(31); - let v0 = _mm_setzero_si128(); + core::ptr::write(dst, b'"'); + dst = dst.add(1); - unsafe { - while nb >= STRIDE { - let str_vec = _mm_loadu_si128($src as *const __m128i); + if value_len < STRIDE { + impl_format_scalar!(dst, src, value_len); + } else { + let blash = splat_mm128!(b'\\'); + let quote = splat_mm128!(b'"'); + let x20 = splat_mm128!(31); + let v0 = _mm_setzero_si128(); - let mask = _mm_movemask_epi8(_mm_or_si128( - _mm_or_si128( - _mm_cmpeq_epi8(str_vec, blash), - _mm_cmpeq_epi8(str_vec, quote), - ), - _mm_cmpeq_epi8(_mm_subs_epu8(str_vec, x20), v0), - )) as u32; - - _mm_storeu_si128($dst as *mut __m128i, str_vec); - - if unlikely!(mask > 0) { - let cn = trailing_zeros!(mask) as usize; - nb -= cn; - $dst = $dst.add(cn); - $src = $src.add(cn); - nb -= 1; - let escape = QUOTE_TAB[*($src) as usize]; - write_escape!(escape, $dst); - $dst = $dst.add(escape.1 as usize); - $src = $src.add(1); - } else { - nb -= STRIDE; - $dst = $dst.add(STRIDE); - $src = $src.add(STRIDE); + let last_stride_src = src.add(value_len).sub(STRIDE); + let mut nb: usize = value_len; + + unsafe { + while nb >= STRIDE { + let str_vec = _mm_loadu_si128(src.cast::<__m128i>()); + + let mask = _mm_movemask_epi8(_mm_or_si128( + _mm_or_si128( + _mm_cmpeq_epi8(str_vec, blash), + _mm_cmpeq_epi8(str_vec, quote), + ), + _mm_cmpeq_epi8(_mm_subs_epu8(str_vec, x20), v0), + )); + + _mm_storeu_si128(dst.cast::<__m128i>(), str_vec); + + if mask != 0 { + let cn = trailing_zeros!(mask) as usize; + nb -= cn; + dst = dst.add(cn); + src = src.add(cn); + nb -= 1; + write_escape!(*(src), dst); + src = src.add(1); + } else { + nb -= STRIDE; + dst = dst.add(STRIDE); + src = src.add(STRIDE); + } } - } - if nb > 0 { let mut scratch: [u8; 32] = [b'a'; 32]; - let mut str_vec = _mm_loadu_si128(last_stride_src as *const __m128i); - _mm_storeu_si128(scratch.as_mut_ptr() as *mut __m128i, str_vec); + let mut str_vec = _mm_loadu_si128(last_stride_src.cast::<__m128i>()); + _mm_storeu_si128(scratch.as_mut_ptr().cast::<__m128i>(), str_vec); let mut scratch_ptr = scratch.as_mut_ptr().add(16 - nb); str_vec = _mm_loadu_si128(scratch_ptr as *const __m128i); @@ -72,54 +81,28 @@ macro_rules! impl_format_simd_sse2_128 { _mm_cmpeq_epi8(str_vec, quote), ), _mm_cmpeq_epi8(_mm_subs_epu8(str_vec, x20), v0), - )) as u32; + )); - while nb > 0 { - _mm_storeu_si128($dst as *mut __m128i, str_vec); + loop { + _mm_storeu_si128(dst.cast::<__m128i>(), str_vec); - if unlikely!(mask > 0) { + if mask != 0 { let cn = trailing_zeros!(mask) as usize; nb -= cn; - $dst = $dst.add(cn); + dst = dst.add(cn); scratch_ptr = scratch_ptr.add(cn); nb -= 1; mask >>= cn + 1; - let escape = QUOTE_TAB[*(scratch_ptr) as usize]; - write_escape!(escape, $dst); - $dst = $dst.add(escape.1 as usize); + write_escape!(*(scratch_ptr), dst); scratch_ptr = scratch_ptr.add(1); str_vec = _mm_loadu_si128(scratch_ptr as *const __m128i); } else { - $dst = $dst.add(nb); + dst = dst.add(nb); break; } } } } - }; -} - -#[allow(dead_code)] -#[inline(never)] -pub unsafe fn format_escaped_str_impl_sse2_128( - odst: *mut u8, - value_ptr: *const u8, - value_len: usize, -) -> usize { - unsafe { - const STRIDE: usize = 16; - - let mut dst = odst; - let mut src = value_ptr; - - core::ptr::write(dst, b'"'); - dst = dst.add(1); - - if value_len < STRIDE { - impl_format_scalar!(dst, src, value_len) - } else { - impl_format_simd_sse2_128!(dst, src, value_len); - } core::ptr::write(dst, b'"'); dst = dst.add(1); diff --git a/src/str/avx512.rs b/src/str/avx512.rs index 230a2de1..c52dc21d 100644 --- a/src/str/avx512.rs +++ b/src/str/avx512.rs @@ -3,7 +3,7 @@ use crate::str::pyunicode_new::*; use core::arch::x86_64::{ - __m256i, _mm256_and_si256, _mm256_cmpgt_epu8_mask, _mm256_cmpneq_epi8_mask, _mm256_loadu_si256, + _mm256_and_si256, _mm256_cmpgt_epu8_mask, _mm256_cmpneq_epi8_mask, _mm256_loadu_epi8, _mm256_mask_cmpneq_epi8_mask, _mm256_maskz_loadu_epi8, _mm256_max_epu8, _mm256_set1_epi8, }; @@ -25,13 +25,13 @@ macro_rules! impl_kind_simd_avx512vl { let remainder_mask: u32 = !(u32::MAX << remainder); let mut str_vec = - _mm256_maskz_loadu_epi8(remainder_mask, $buf.as_bytes().as_ptr() as *const i8); + _mm256_maskz_loadu_epi8(remainder_mask, $buf.as_bytes().as_ptr().cast::()); let sptr = $buf.as_bytes().as_ptr().add(remainder); for i in 0..num_loops { str_vec = _mm256_max_epu8( str_vec, - _mm256_loadu_si256(sptr.add(STRIDE * i) as *const __m256i), + _mm256_loadu_epi8(sptr.add(STRIDE * i).cast::()), ); } @@ -49,7 +49,7 @@ macro_rules! impl_kind_simd_avx512vl { _mm256_and_si256( _mm256_maskz_loadu_epi8( remainder_mask, - $buf.as_bytes().as_ptr() as *const i8 + $buf.as_bytes().as_ptr().cast::() ), multibyte ), @@ -59,7 +59,7 @@ macro_rules! impl_kind_simd_avx512vl { for i in 0..num_loops { num_chars += popcnt!(_mm256_cmpneq_epi8_mask( _mm256_and_si256( - _mm256_loadu_si256(sptr.add(STRIDE * i) as *const __m256i), + _mm256_loadu_epi8(sptr.add(STRIDE * i).cast::()), multibyte ), vec_128, @@ -79,10 +79,7 @@ macro_rules! impl_kind_simd_avx512vl { } #[inline(never)] -#[cfg_attr( - feature = "avx512", - target_feature(enable = "avx512f,avx512bw,avx512vl,bmi2") -)] +#[target_feature(enable = "avx512f,avx512bw,avx512vl,bmi2")] pub unsafe fn create_str_impl_avx512vl(buf: &str) -> *mut pyo3_ffi::PyObject { impl_kind_simd_avx512vl!(buf) } @@ -93,10 +90,18 @@ pub fn unicode_from_str(buf: &str) -> *mut pyo3_ffi::PyObject { if unlikely!(buf.is_empty()) { return use_immortal!(crate::typeref::EMPTY_UNICODE); } + STR_CREATE_FN(buf) + } +} + +pub type StrDeserializer = unsafe fn(&str) -> *mut pyo3_ffi::PyObject; + +static mut STR_CREATE_FN: StrDeserializer = super::scalar::str_impl_kind_scalar; + +pub fn set_str_create_fn() { + unsafe { if std::is_x86_feature_detected!("avx512vl") { - create_str_impl_avx512vl(buf) - } else { - super::scalar::unicode_from_str(buf) + STR_CREATE_FN = create_str_impl_avx512vl; } } } diff --git a/src/str/ffi.rs b/src/str/ffi.rs index 21841a88..4f1c76bd 100644 --- a/src/str/ffi.rs +++ b/src/str/ffi.rs @@ -1,7 +1,8 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) +use crate::util::isize_to_usize; use core::ffi::c_void; -use pyo3_ffi::*; +use pyo3_ffi::{PyASCIIObject, PyCompactUnicodeObject, PyObject, Py_hash_t}; // see unicodeobject.h for documentation @@ -11,12 +12,14 @@ pub fn hash_str(op: *mut PyObject) -> Py_hash_t { let data_ptr: *mut c_void = if (*op.cast::()).compact() == 1 && (*op.cast::()).ascii() == 1 { - (op as *mut PyASCIIObject).offset(1) as *mut c_void + op.cast::().offset(1).cast::() } else { - (op as *mut PyCompactUnicodeObject).offset(1) as *mut c_void + op.cast::() + .offset(1) + .cast::() }; let num_bytes = - (*(op as *mut PyASCIIObject)).length * ((*(op as *mut PyASCIIObject)).kind()) as isize; + (*op.cast::()).length * ((*op.cast::()).kind()) as isize; #[cfg(Py_3_14)] let hash = pyo3_ffi::Py_HashBuffer(data_ptr, num_bytes); #[cfg(not(Py_3_14))] @@ -29,11 +32,11 @@ pub fn hash_str(op: *mut PyObject) -> Py_hash_t { #[inline(never)] pub fn unicode_to_str_via_ffi(op: *mut PyObject) -> Option<&'static str> { let mut str_size: pyo3_ffi::Py_ssize_t = 0; - let ptr = ffi!(PyUnicode_AsUTF8AndSize(op, &mut str_size)) as *const u8; + let ptr = ffi!(PyUnicode_AsUTF8AndSize(op, &mut str_size)).cast::(); if unlikely!(ptr.is_null()) { None } else { - Some(str_from_slice!(ptr, str_size as usize)) + Some(str_from_slice!(ptr, isize_to_usize(str_size))) } } @@ -44,11 +47,11 @@ pub fn unicode_to_str(op: *mut PyObject) -> Option<&'static str> { unicode_to_str_via_ffi(op) } else if (*op.cast::()).ascii() == 1 { let ptr = op.cast::().offset(1) as *const u8; - let len = (*op.cast::()).length as usize; + let len = isize_to_usize((*op.cast::()).length); Some(str_from_slice!(ptr, len)) } else if (*op.cast::()).utf8_length != 0 { let ptr = (*op.cast::()).utf8 as *const u8; - let len = (*op.cast::()).utf8_length as usize; + let len = isize_to_usize((*op.cast::()).utf8_length); Some(str_from_slice!(ptr, len)) } else { unicode_to_str_via_ffi(op) diff --git a/src/str/mod.rs b/src/str/mod.rs index 75149e9b..c71ce296 100644 --- a/src/str/mod.rs +++ b/src/str/mod.rs @@ -7,9 +7,9 @@ mod pyunicode_new; mod scalar; #[cfg(not(feature = "avx512"))] -pub use scalar::unicode_from_str; +pub use scalar::{set_str_create_fn, unicode_from_str}; #[cfg(feature = "avx512")] -pub use avx512::unicode_from_str; +pub use avx512::{set_str_create_fn, unicode_from_str}; pub use ffi::*; diff --git a/src/str/pyunicode_new.rs b/src/str/pyunicode_new.rs index b5496891..7b1c2df2 100644 --- a/src/str/pyunicode_new.rs +++ b/src/str/pyunicode_new.rs @@ -1,15 +1,30 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use pyo3_ffi::{PyASCIIObject, PyCompactUnicodeObject}; +use crate::util::usize_to_isize; +use pyo3_ffi::{PyASCIIObject, PyCompactUnicodeObject, PyObject}; + +macro_rules! validate_str { + ($ptr:expr) => { + #[cfg(not(Py_3_12))] + debug_assert!((*($ptr.cast::())).ready() == 1); + + debug_assert!((*($ptr.cast::())).compact() == 1); + debug_assert!((*($ptr.cast::())).interned() == 0); + + debug_assert!(ffi!(_PyUnicode_CheckConsistency($ptr.cast::(), 1)) == 1); + }; +} #[inline(never)] pub fn pyunicode_ascii(buf: *const u8, num_chars: usize) -> *mut pyo3_ffi::PyObject { unsafe { - let ptr = ffi!(PyUnicode_New(num_chars as isize, 127)); - let data_ptr = ptr.cast::().offset(1) as *mut u8; + let ptr = ffi!(PyUnicode_New(usize_to_isize(num_chars), 127)); + let data_ptr = ptr.cast::().offset(1).cast::(); core::ptr::copy_nonoverlapping(buf, data_ptr, num_chars); core::ptr::write(data_ptr.add(num_chars), 0); - ptr + debug_assert!((*(ptr.cast::())).ascii() == 1); + validate_str!(ptr); + ptr.cast::() } } @@ -17,41 +32,44 @@ pub fn pyunicode_ascii(buf: *const u8, num_chars: usize) -> *mut pyo3_ffi::PyObj #[inline(never)] pub fn pyunicode_onebyte(buf: &str, num_chars: usize) -> *mut pyo3_ffi::PyObject { unsafe { - let ptr = ffi!(PyUnicode_New(num_chars as isize, 255)); - let mut data_ptr = ptr.cast::().offset(1) as *mut u8; + let ptr = ffi!(PyUnicode_New(usize_to_isize(num_chars), 255)); + let mut data_ptr = ptr.cast::().offset(1).cast::(); for each in buf.chars().fuse() { core::ptr::write(data_ptr, each as u8); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); - ptr + validate_str!(ptr); + ptr.cast::() } } #[inline(never)] pub fn pyunicode_twobyte(buf: &str, num_chars: usize) -> *mut pyo3_ffi::PyObject { unsafe { - let ptr = ffi!(PyUnicode_New(num_chars as isize, 65535)); - let mut data_ptr = ptr.cast::().offset(1) as *mut u16; + let ptr = ffi!(PyUnicode_New(usize_to_isize(num_chars), 65535)); + let mut data_ptr = ptr.cast::().offset(1).cast::(); for each in buf.chars().fuse() { core::ptr::write(data_ptr, each as u16); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); - ptr + validate_str!(ptr); + ptr.cast::() } } #[inline(never)] pub fn pyunicode_fourbyte(buf: &str, num_chars: usize) -> *mut pyo3_ffi::PyObject { unsafe { - let ptr = ffi!(PyUnicode_New(num_chars as isize, 1114111)); - let mut data_ptr = ptr.cast::().offset(1) as *mut u32; + let ptr = ffi!(PyUnicode_New(usize_to_isize(num_chars), 1114111)); + let mut data_ptr = ptr.cast::().offset(1).cast::(); for each in buf.chars().fuse() { core::ptr::write(data_ptr, each as u32); data_ptr = data_ptr.offset(1); } core::ptr::write(data_ptr, 0); - ptr + validate_str!(ptr); + ptr.cast::() } } diff --git a/src/str/scalar.rs b/src/str/scalar.rs index 0aa84c7d..12adb634 100644 --- a/src/str/scalar.rs +++ b/src/str/scalar.rs @@ -1,10 +1,15 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) -use crate::str::pyunicode_new::*; -use crate::typeref::EMPTY_UNICODE; +use crate::str::pyunicode_new::{ + pyunicode_ascii, pyunicode_fourbyte, pyunicode_onebyte, pyunicode_twobyte, +}; -#[inline(always)] -pub fn str_impl_kind_scalar(buf: &str, num_chars: usize) -> *mut pyo3_ffi::PyObject { +#[inline(never)] +pub fn str_impl_kind_scalar(buf: &str) -> *mut pyo3_ffi::PyObject { + let num_chars = bytecount::num_chars(buf.as_bytes()); + if buf.len() == num_chars { + return pyunicode_ascii(buf.as_ptr(), num_chars); + } unsafe { let len = buf.len(); assume!(len > 0); @@ -31,15 +36,14 @@ pub fn str_impl_kind_scalar(buf: &str, num_chars: usize) -> *mut pyo3_ffi::PyObj } } -#[inline(never)] +#[cfg(not(feature = "avx512"))] +#[inline(always)] pub fn unicode_from_str(buf: &str) -> *mut pyo3_ffi::PyObject { if unlikely!(buf.is_empty()) { - return use_immortal!(EMPTY_UNICODE); - } - let num_chars = bytecount::num_chars(buf.as_bytes()); - if buf.len() == num_chars { - pyunicode_ascii(buf.as_ptr(), num_chars) - } else { - str_impl_kind_scalar(buf, num_chars) + return use_immortal!(crate::typeref::EMPTY_UNICODE); } + str_impl_kind_scalar(buf) } + +#[cfg(not(feature = "avx512"))] +pub fn set_str_create_fn() {} diff --git a/src/typeref.rs b/src/typeref.rs index f9c15e37..bf925a4d 100644 --- a/src/typeref.rs +++ b/src/typeref.rs @@ -1,16 +1,17 @@ // SPDX-License-Identifier: (Apache-2.0 OR MIT) use crate::ffi::orjson_fragmenttype_new; -use core::ffi::c_char; #[cfg(feature = "yyjson")] use core::ffi::c_void; +use core::ffi::CStr; #[cfg(feature = "yyjson")] use core::mem::MaybeUninit; use core::ptr::{null_mut, NonNull}; use once_cell::race::{OnceBool, OnceBox}; -use pyo3_ffi::*; + #[cfg(feature = "yyjson")] -use std::cell::UnsafeCell; +use core::cell::UnsafeCell; +use pyo3_ffi::*; pub struct NumpyTypes { pub array: *mut PyTypeObject, @@ -58,7 +59,6 @@ pub static mut FRAGMENT_TYPE: *mut PyTypeObject = null_mut(); pub static mut NUMPY_TYPES: OnceBox>> = OnceBox::new(); -#[cfg(Py_3_9)] pub static mut ZONEINFO_TYPE: *mut PyTypeObject = null_mut(); pub static mut UTCOFFSET_METHOD_STR: *mut PyObject = null_mut(); @@ -97,7 +97,7 @@ pub fn yyjson_init() -> Box { // Using unsafe to ensure allocation happens on the heap without going through the stack // so we don't stack overflow in debug mode. Once rust-lang/rust#63291 is stable (Box::new_uninit) // we can use that instead. - let layout = std::alloc::Layout::new::(); + let layout = core::alloc::Layout::new::(); let buffer = unsafe { Box::from_raw(std::alloc::alloc(layout).cast::()) }; let mut alloc = crate::ffi::yyjson::yyjson_alc { malloc: None, @@ -135,66 +135,60 @@ pub fn init_typerefs() { #[cfg_attr(feature = "optimize", optimize(size))] fn _init_typerefs_impl() -> bool { unsafe { - debug_assert!(crate::opt::MAX_OPT < u16::MAX as i32); + debug_assert!(crate::opt::MAX_OPT < i32::from(u16::MAX)); assert!(crate::deserialize::KEY_MAP .set(crate::deserialize::KeyMap::default()) .is_ok()); - FRAGMENT_TYPE = orjson_fragmenttype_new(); - PyDateTime_IMPORT(); + + crate::serialize::writer::set_str_formatter_fn(); + crate::str::set_str_create_fn(); + NONE = Py_None(); TRUE = Py_True(); FALSE = Py_False(); EMPTY_UNICODE = PyUnicode_New(0, 255); - STR_TYPE = (*EMPTY_UNICODE).ob_type; - BYTES_TYPE = (*PyBytes_FromStringAndSize("".as_ptr() as *const c_char, 0)).ob_type; - { - let bytearray = PyByteArray_FromStringAndSize("".as_ptr() as *const c_char, 0); - BYTEARRAY_TYPE = (*bytearray).ob_type; + STR_TYPE = &raw mut PyUnicode_Type; + BYTES_TYPE = &raw mut PyBytes_Type; + DICT_TYPE = &raw mut PyDict_Type; + LIST_TYPE = &raw mut PyList_Type; + TUPLE_TYPE = &raw mut PyTuple_Type; + NONE_TYPE = (*NONE).ob_type; + BOOL_TYPE = &raw mut PyBool_Type; + INT_TYPE = &raw mut PyLong_Type; + FLOAT_TYPE = &raw mut PyFloat_Type; + BYTEARRAY_TYPE = &raw mut PyByteArray_Type; + MEMORYVIEW_TYPE = &raw mut PyMemoryView_Type; - let memoryview = PyMemoryView_FromObject(bytearray); - MEMORYVIEW_TYPE = (*memoryview).ob_type; - Py_DECREF(memoryview); - Py_DECREF(bytearray); - } + PyDateTime_IMPORT(); - DICT_TYPE = (*PyDict_New()).ob_type; - LIST_TYPE = (*PyList_New(0)).ob_type; - TUPLE_TYPE = (*PyTuple_New(0)).ob_type; - NONE_TYPE = (*NONE).ob_type; - BOOL_TYPE = (*TRUE).ob_type; - INT_TYPE = (*PyLong_FromLongLong(0)).ob_type; - FLOAT_TYPE = (*PyFloat_FromDouble(0.0)).ob_type; DATETIME_TYPE = look_up_datetime_type(); DATE_TYPE = look_up_date_type(); TIME_TYPE = look_up_time_type(); UUID_TYPE = look_up_uuid_type(); ENUM_TYPE = look_up_enum_type(); - FIELD_TYPE = look_up_field_type(); - #[cfg(Py_3_9)] - { - ZONEINFO_TYPE = look_up_zoneinfo_type(); - } + FRAGMENT_TYPE = orjson_fragmenttype_new(); - INT_ATTR_STR = PyUnicode_InternFromString("int\0".as_ptr() as *const c_char); - UTCOFFSET_METHOD_STR = PyUnicode_InternFromString("utcoffset\0".as_ptr() as *const c_char); - NORMALIZE_METHOD_STR = PyUnicode_InternFromString("normalize\0".as_ptr() as *const c_char); - CONVERT_METHOD_STR = PyUnicode_InternFromString("convert\0".as_ptr() as *const c_char); - DST_STR = PyUnicode_InternFromString("dst\0".as_ptr() as *const c_char); - DICT_STR = PyUnicode_InternFromString("__dict__\0".as_ptr() as *const c_char); - DATACLASS_FIELDS_STR = - PyUnicode_InternFromString("__dataclass_fields__\0".as_ptr() as *const c_char); - SLOTS_STR = PyUnicode_InternFromString("__slots__\0".as_ptr() as *const c_char); - FIELD_TYPE_STR = PyUnicode_InternFromString("_field_type\0".as_ptr() as *const c_char); - ARRAY_STRUCT_STR = - PyUnicode_InternFromString("__array_struct__\0".as_ptr() as *const c_char); - DTYPE_STR = PyUnicode_InternFromString("dtype\0".as_ptr() as *const c_char); - DESCR_STR = PyUnicode_InternFromString("descr\0".as_ptr() as *const c_char); - VALUE_STR = PyUnicode_InternFromString("value\0".as_ptr() as *const c_char); - DEFAULT = PyUnicode_InternFromString("default\0".as_ptr() as *const c_char); - OPTION = PyUnicode_InternFromString("option\0".as_ptr() as *const c_char); + FIELD_TYPE = look_up_field_type(); + ZONEINFO_TYPE = look_up_zoneinfo_type(); + + INT_ATTR_STR = PyUnicode_InternFromString(c"int".as_ptr()); + UTCOFFSET_METHOD_STR = PyUnicode_InternFromString(c"utcoffset".as_ptr()); + NORMALIZE_METHOD_STR = PyUnicode_InternFromString(c"normalize".as_ptr()); + CONVERT_METHOD_STR = PyUnicode_InternFromString(c"convert".as_ptr()); + DST_STR = PyUnicode_InternFromString(c"dst".as_ptr()); + DICT_STR = PyUnicode_InternFromString(c"__dict__".as_ptr()); + DATACLASS_FIELDS_STR = PyUnicode_InternFromString(c"__dataclass_fields__".as_ptr()); + SLOTS_STR = PyUnicode_InternFromString(c"__slots__".as_ptr()); + FIELD_TYPE_STR = PyUnicode_InternFromString(c"_field_type".as_ptr()); + ARRAY_STRUCT_STR = PyUnicode_InternFromString(c"__array_struct__".as_ptr()); + DTYPE_STR = PyUnicode_InternFromString(c"dtype".as_ptr()); + DESCR_STR = PyUnicode_InternFromString(c"descr".as_ptr()); + VALUE_STR = PyUnicode_InternFromString(c"value".as_ptr()); + DEFAULT = PyUnicode_InternFromString(c"default".as_ptr()); + OPTION = PyUnicode_InternFromString(c"option".as_ptr()); JsonEncodeError = pyo3_ffi::PyExc_TypeError; Py_INCREF(JsonEncodeError); JsonDecodeError = look_up_json_exc(); @@ -206,15 +200,10 @@ fn _init_typerefs_impl() -> bool { #[cfg_attr(feature = "optimize", optimize(size))] unsafe fn look_up_json_exc() -> *mut PyObject { unsafe { - let module = PyImport_ImportModule("json\0".as_ptr() as *const c_char); + let module = PyImport_ImportModule(c"json".as_ptr()); let module_dict = PyObject_GenericGetDict(module, null_mut()); - let ptr = - PyMapping_GetItemString(module_dict, "JSONDecodeError\0".as_ptr() as *const c_char); - let res = pyo3_ffi::PyErr_NewException( - "orjson.JSONDecodeError\0".as_ptr() as *const c_char, - ptr, - null_mut(), - ); + let ptr = PyMapping_GetItemString(module_dict, c"JSONDecodeError".as_ptr()); + let res = pyo3_ffi::PyErr_NewException(c"orjson.JSONDecodeError".as_ptr(), ptr, null_mut()); Py_DECREF(ptr); Py_DECREF(module_dict); Py_DECREF(module); @@ -225,11 +214,14 @@ unsafe fn look_up_json_exc() -> *mut PyObject { #[cold] #[cfg_attr(feature = "optimize", optimize(size))] -unsafe fn look_up_numpy_type(numpy_module_dict: *mut PyObject, np_type: &str) -> *mut PyTypeObject { +unsafe fn look_up_numpy_type( + numpy_module_dict: *mut PyObject, + np_type: &CStr, +) -> *mut PyTypeObject { unsafe { - let ptr = PyMapping_GetItemString(numpy_module_dict, np_type.as_ptr() as *const c_char); + let ptr = PyMapping_GetItemString(numpy_module_dict, np_type.as_ptr()); Py_XDECREF(ptr); - ptr as *mut PyTypeObject + ptr.cast::() } } @@ -237,27 +229,27 @@ unsafe fn look_up_numpy_type(numpy_module_dict: *mut PyObject, np_type: &str) -> #[cfg_attr(feature = "optimize", optimize(size))] pub fn load_numpy_types() -> Box>> { unsafe { - let numpy = PyImport_ImportModule("numpy\0".as_ptr() as *const c_char); + let numpy = PyImport_ImportModule(c"numpy".as_ptr()); if numpy.is_null() { PyErr_Clear(); return Box::new(None); } let numpy_module_dict = PyObject_GenericGetDict(numpy, null_mut()); let types = Box::new(NumpyTypes { - array: look_up_numpy_type(numpy_module_dict, "ndarray\0"), - float16: look_up_numpy_type(numpy_module_dict, "half\0"), - float32: look_up_numpy_type(numpy_module_dict, "float32\0"), - float64: look_up_numpy_type(numpy_module_dict, "float64\0"), - int8: look_up_numpy_type(numpy_module_dict, "int8\0"), - int16: look_up_numpy_type(numpy_module_dict, "int16\0"), - int32: look_up_numpy_type(numpy_module_dict, "int32\0"), - int64: look_up_numpy_type(numpy_module_dict, "int64\0"), - uint16: look_up_numpy_type(numpy_module_dict, "uint16\0"), - uint32: look_up_numpy_type(numpy_module_dict, "uint32\0"), - uint64: look_up_numpy_type(numpy_module_dict, "uint64\0"), - uint8: look_up_numpy_type(numpy_module_dict, "uint8\0"), - bool_: look_up_numpy_type(numpy_module_dict, "bool_\0"), - datetime64: look_up_numpy_type(numpy_module_dict, "datetime64\0"), + array: look_up_numpy_type(numpy_module_dict, c"ndarray"), + float16: look_up_numpy_type(numpy_module_dict, c"half"), + float32: look_up_numpy_type(numpy_module_dict, c"float32"), + float64: look_up_numpy_type(numpy_module_dict, c"float64"), + int8: look_up_numpy_type(numpy_module_dict, c"int8"), + int16: look_up_numpy_type(numpy_module_dict, c"int16"), + int32: look_up_numpy_type(numpy_module_dict, c"int32"), + int64: look_up_numpy_type(numpy_module_dict, c"int64"), + uint16: look_up_numpy_type(numpy_module_dict, c"uint16"), + uint32: look_up_numpy_type(numpy_module_dict, c"uint32"), + uint64: look_up_numpy_type(numpy_module_dict, c"uint64"), + uint8: look_up_numpy_type(numpy_module_dict, c"uint8"), + bool_: look_up_numpy_type(numpy_module_dict, c"bool_"), + datetime64: look_up_numpy_type(numpy_module_dict, c"datetime64"), }); Py_XDECREF(numpy_module_dict); Py_XDECREF(numpy); @@ -269,10 +261,9 @@ pub fn load_numpy_types() -> Box>> { #[cfg_attr(feature = "optimize", optimize(size))] unsafe fn look_up_field_type() -> *mut PyTypeObject { unsafe { - let module = PyImport_ImportModule("dataclasses\0".as_ptr() as *const c_char); + let module = PyImport_ImportModule(c"dataclasses".as_ptr()); let module_dict = PyObject_GenericGetDict(module, null_mut()); - let ptr = PyMapping_GetItemString(module_dict, "_FIELD\0".as_ptr() as *const c_char) - as *mut PyTypeObject; + let ptr = PyMapping_GetItemString(module_dict, c"_FIELD".as_ptr()).cast::(); Py_DECREF(module_dict); Py_DECREF(module); ptr @@ -283,10 +274,9 @@ unsafe fn look_up_field_type() -> *mut PyTypeObject { #[cfg_attr(feature = "optimize", optimize(size))] unsafe fn look_up_enum_type() -> *mut PyTypeObject { unsafe { - let module = PyImport_ImportModule("enum\0".as_ptr() as *const c_char); + let module = PyImport_ImportModule(c"enum".as_ptr()); let module_dict = PyObject_GenericGetDict(module, null_mut()); - let ptr = PyMapping_GetItemString(module_dict, "EnumMeta\0".as_ptr() as *const c_char) - as *mut PyTypeObject; + let ptr = PyMapping_GetItemString(module_dict, c"EnumMeta".as_ptr()).cast::(); Py_DECREF(module_dict); Py_DECREF(module); ptr @@ -297,10 +287,9 @@ unsafe fn look_up_enum_type() -> *mut PyTypeObject { #[cfg_attr(feature = "optimize", optimize(size))] unsafe fn look_up_uuid_type() -> *mut PyTypeObject { unsafe { - let uuid_mod = PyImport_ImportModule("uuid\0".as_ptr() as *const c_char); + let uuid_mod = PyImport_ImportModule(c"uuid".as_ptr()); let uuid_mod_dict = PyObject_GenericGetDict(uuid_mod, null_mut()); - let uuid = - PyMapping_GetItemString(uuid_mod_dict, "NAMESPACE_DNS\0".as_ptr() as *const c_char); + let uuid = PyMapping_GetItemString(uuid_mod_dict, c"NAMESPACE_DNS".as_ptr()); let ptr = (*uuid).ob_type; Py_DECREF(uuid); Py_DECREF(uuid_mod_dict); @@ -353,15 +342,13 @@ unsafe fn look_up_time_type() -> *mut PyTypeObject { } } -#[cfg(Py_3_9)] #[cold] #[cfg_attr(feature = "optimize", optimize(size))] unsafe fn look_up_zoneinfo_type() -> *mut PyTypeObject { unsafe { - let module = PyImport_ImportModule("zoneinfo\0".as_ptr() as *const c_char); + let module = PyImport_ImportModule(c"zoneinfo".as_ptr()); let module_dict = PyObject_GenericGetDict(module, null_mut()); - let ptr = PyMapping_GetItemString(module_dict, "ZoneInfo\0".as_ptr() as *const c_char) - as *mut PyTypeObject; + let ptr = PyMapping_GetItemString(module_dict, c"ZoneInfo".as_ptr()).cast::(); Py_DECREF(module_dict); Py_DECREF(module); ptr diff --git a/src/util.rs b/src/util.rs index 1a0b630a..ccf425ce 100644 --- a/src/util.rs +++ b/src/util.rs @@ -47,7 +47,7 @@ macro_rules! is_subclass_by_flag { macro_rules! is_subclass_by_type { ($ob_type:expr, $type:ident) => { unsafe { - (*($ob_type as *mut pyo3_ffi::PyTypeObject)) + (*($ob_type.cast::())) .ob_base .ob_base .ob_type @@ -112,7 +112,7 @@ macro_rules! nonnull { macro_rules! str_from_slice { ($ptr:expr, $size:expr) => { - unsafe { std::str::from_utf8_unchecked(core::slice::from_raw_parts($ptr, $size as usize)) } + unsafe { core::str::from_utf8_unchecked(core::slice::from_raw_parts($ptr, $size as usize)) } }; } @@ -168,7 +168,6 @@ macro_rules! ffi { }; } -#[cfg(Py_3_9)] macro_rules! call_method { ($obj1:expr, $obj2:expr) => { unsafe { pyo3_ffi::PyObject_CallMethodNoArgs($obj1, $obj2) } @@ -178,29 +177,6 @@ macro_rules! call_method { }; } -#[cfg(not(Py_3_9))] -macro_rules! call_method { - ($obj1:expr, $obj2:expr) => { - unsafe { - pyo3_ffi::PyObject_CallMethodObjArgs( - $obj1, - $obj2, - core::ptr::null_mut() as *mut pyo3_ffi::PyObject, - ) - } - }; - ($obj1:expr, $obj2:expr, $obj3:expr) => { - unsafe { - pyo3_ffi::PyObject_CallMethodObjArgs( - $obj1, - $obj2, - $obj3, - core::ptr::null_mut() as *mut pyo3_ffi::PyObject, - ) - } - }; -} - macro_rules! str_hash { ($op:expr) => { unsafe { (*$op.cast::()).hash } @@ -278,6 +254,28 @@ macro_rules! pydict_next { }; } +macro_rules! pydict_setitem { + ($dict:expr, $pykey:expr, $pyval:expr) => { + debug_assert!(ffi!(Py_REFCNT($dict)) == 1); + debug_assert!(str_hash!($pykey) != -1); + #[cfg(not(Py_3_13))] + unsafe { + let _ = pyo3_ffi::_PyDict_SetItem_KnownHash($dict, $pykey, $pyval, str_hash!($pykey)); + } + #[cfg(Py_3_13)] + unsafe { + let _ = pyo3_ffi::_PyDict_SetItem_KnownHash_LockHeld( + $dict.cast::(), + $pykey, + $pyval, + str_hash!($pykey), + ); + } + reverse_pydict_incref!($pykey); + reverse_pydict_incref!($pyval); + }; +} + macro_rules! reserve_minimum { ($writer:expr) => { $writer.reserve(64); @@ -337,3 +335,17 @@ macro_rules! unreachable_unchecked { unsafe { core::hint::unreachable_unchecked() } }; } + +#[inline(always)] +#[allow(clippy::cast_possible_wrap)] +pub fn usize_to_isize(val: usize) -> isize { + debug_assert!(val < (isize::MAX as usize)); + val as isize +} + +#[inline(always)] +#[allow(clippy::cast_sign_loss)] +pub fn isize_to_usize(val: isize) -> usize { + debug_assert!(val >= 0); + val as usize +} diff --git a/test/requirements.txt b/test/requirements.txt index 97bee4ca..d5916c84 100644 --- a/test/requirements.txt +++ b/test/requirements.txt @@ -1,8 +1,7 @@ -arrow faker numpy;(platform_machine=="x86_64" or (platform_machine=="aarch64" and sys_platform == "linux")) and python_version<"3.13" -pendulum;sys_platform=="linux" and platform_machine=="x86_64" and python_version<"3.12" -time-machine < 2.15;sys_platform=="linux" and platform_machine=="x86_64" and python_version<"3.12" +pendulum;sys_platform=="linux" and platform_machine=="x86_64" and python_version<"3.14" psutil;(sys_platform=="linux" or sys_platform == "macos") and platform_machine=="x86_64" and python_version<"3.13" pytest +python-dateutil >=2,<3;python_version<"3.14" pytz diff --git a/test/test_api.py b/test/test_api.py index f4078b5b..58e5d4fa 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -79,7 +79,7 @@ def test_loads_recursion_valid_limit_mixed(self): loads() recursion limit at limit mixed """ n = LOADS_RECURSION_LIMIT - value = b"[" b'{"key":' * n + b'{"key":true}' + b"}" * n + b"]" + value = b"".join((b"[", b'{"key":' * n, b'{"key":true}' + b"}" * n, b"]")) pytest.raises(orjson.JSONDecodeError, orjson.loads, value) def test_loads_recursion_valid_excessive_array(self): @@ -111,7 +111,7 @@ def test_loads_recursion_valid_limit_mixed_pretty(self): loads() recursion limit at limit mixed pretty """ n = LOADS_RECURSION_LIMIT - value = b"[\n " b'{"key":' * n + b'{"key":true}' + b"}" * n + b"]" + value = b'[\n {"key":' * n + b'{"key":true}' + b"}" * n + b"]" pytest.raises(orjson.JSONDecodeError, orjson.loads, value) def test_loads_recursion_valid_excessive_array_pretty(self): diff --git a/test/test_escape.py b/test/test_escape.py new file mode 100644 index 00000000..30f1c153 --- /dev/null +++ b/test/test_escape.py @@ -0,0 +1,126 @@ +# SPDX-License-Identifier: (Apache-2.0 OR MIT) + +import orjson + + +def test_issue565(): + assert ( + orjson.dumps("\n\r\u000b\f\u001c\u001d\u001e") + == b'"\\n\\r\\u000b\\f\\u001c\\u001d\\u001e"' + ) + + +def test_0x00(): + assert orjson.dumps("\u0000") == b'"\\u0000"' + + +def test_0x01(): + assert orjson.dumps("\u0001") == b'"\\u0001"' + + +def test_0x02(): + assert orjson.dumps("\u0002") == b'"\\u0002"' + + +def test_0x03(): + assert orjson.dumps("\u0003") == b'"\\u0003"' + + +def test_0x04(): + assert orjson.dumps("\u0004") == b'"\\u0004"' + + +def test_0x05(): + assert orjson.dumps("\u0005") == b'"\\u0005"' + + +def test_0x06(): + assert orjson.dumps("\u0006") == b'"\\u0006"' + + +def test_0x07(): + assert orjson.dumps("\u0007") == b'"\\u0007"' + + +def test_0x08(): + assert orjson.dumps("\u0008") == b'"\\b"' + + +def test_0x09(): + assert orjson.dumps("\u0009") == b'"\\t"' + + +def test_0x0a(): + assert orjson.dumps("\u000a") == b'"\\n"' + + +def test_0x0b(): + assert orjson.dumps("\u000b") == b'"\\u000b"' + + +def test_0x0c(): + assert orjson.dumps("\u000c") == b'"\\f"' + + +def test_0x0d(): + assert orjson.dumps("\u000d") == b'"\\r"' + + +def test_0x0e(): + assert orjson.dumps("\u000e") == b'"\\u000e"' + + +def test_0x0f(): + assert orjson.dumps("\u000f") == b'"\\u000f"' + + +def test_0x10(): + assert orjson.dumps("\u0010") == b'"\\u0010"' + + +def test_0x11(): + assert orjson.dumps("\u0011") == b'"\\u0011"' + + +def test_0x12(): + assert orjson.dumps("\u0012") == b'"\\u0012"' + + +def test_0x13(): + assert orjson.dumps("\u0013") == b'"\\u0013"' + + +def test_0x14(): + assert orjson.dumps("\u0014") == b'"\\u0014"' + + +def test_0x15(): + assert orjson.dumps("\u0015") == b'"\\u0015"' + + +def test_0x16(): + assert orjson.dumps("\u0016") == b'"\\u0016"' + + +def test_0x17(): + assert orjson.dumps("\u0017") == b'"\\u0017"' + + +def test_0x18(): + assert orjson.dumps("\u0018") == b'"\\u0018"' + + +def test_0x19(): + assert orjson.dumps("\u0019") == b'"\\u0019"' + + +def test_0x1a(): + assert orjson.dumps("\u001a") == b'"\\u001a"' + + +def test_backslash(): + assert orjson.dumps("\\") == b'"\\\\"' + + +def test_quote(): + assert orjson.dumps('"') == b'"\\""'