Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Improve throughput of String.Split(char / char[], ...) #15322

Merged
merged 2 commits into from
Dec 1, 2017

Conversation

stephentoub
Copy link
Member

Also reduces the amount of unsafe code used.

Benchmark Results:

Benchmark Mean Before Mean After Improvement
Split_NoSeps_NoSplits 94.65 69.33 26.75%
Split_1Sep_NoSplits 47.11 34.61 26.53%
Split_2Seps_NoSplits 61.28 35.21 42.54%
Split_3Seps_NoSplits 91.39 43.14 52.80%
Split_ManySeps_NoSplits 118.73 53.75 54.73%
Split_NoSeps_Splits 123.89 118.58 4.29%
Split_1Sep_Splits 92.72 80.92 12.73%
Split_2Seps_Splits 100.64 88.27 12.29%
Split_3Seps_Splits 131.76 104.92 20.37%
Split_ManySeps_Splits 182.5 130.35 28.58%
Split_NoSeps_ManySplits 275.11 281.97 -2.49%
Split_1Sep_ManySplits 251.72 242.38 3.71%
Split_2Seps_ManySplits 255.53 244.98 4.13%
Split_3Seps_ManySplits 271.73 250.99 7.63%
Split_ManySeps_ManySplits 314.8 308.13 2.12%

Benchmark

using System;
using BenchmarkDotNet.Attributes.Jobs;
using BenchmarkDotNet.Attributes;
using BenchmarkDotNet.Running;

[InProcess]
public class Program
{
    public static void Main() => BenchmarkRunner.Run<Program>();
    private const int Iters = 1_000;
    private static readonly char[] s_commaSpace = new char[] { ',', ' ' };
    private static readonly char[] s_commaSpaceColon = new char[] { ',', ' ', ':' };
    private static readonly char[] s_letters = new char[] { 'a','b','c','d','e','f','g' };

    [Benchmark]
    public unsafe void Split_NoSeps_NoSplits()
    {
        for (int i = 0; i < Iters; i++) "123456789123456789123456789".Split();
    }

    [Benchmark]
    public unsafe void Split_1Sep_NoSplits()
    {
        for (int i = 0; i < Iters; i++) "123456789123456789123456789".Split(',');
    }

    [Benchmark]
    public unsafe void Split_2Seps_NoSplits()
    {
        char[] seps = s_commaSpace;
        for (int i = 0; i < Iters; i++) "123456789123456789123456789".Split(seps);
    }

    [Benchmark]
    public unsafe void Split_3Seps_NoSplits()
    {
        char[] seps = s_commaSpaceColon;
        for (int i = 0; i < Iters; i++) "123456789123456789123456789".Split(seps);
    }

    [Benchmark]
    public unsafe void Split_ManySeps_NoSplits()
    {
        char[] seps = s_letters;
        for (int i = 0; i < Iters; i++) "123456789123456789123456789".Split(seps);
    }


    [Benchmark]
    public unsafe void Split_NoSeps_Splits()
    {
        for (int i = 0; i < Iters; i++) "12345678 12345678 123456789".Split();
    }

    [Benchmark]
    public unsafe void Split_1Sep_Splits()
    {
        for (int i = 0; i < Iters; i++) "12345678,12345678,123456789".Split(',');
    }

    [Benchmark]
    public unsafe void Split_2Seps_Splits()
    {
        char[] seps = s_commaSpace;
        for (int i = 0; i < Iters; i++) "12345678,12345678 123456789".Split(seps);
    }

    [Benchmark]
    public unsafe void Split_3Seps_Splits()
    {
        char[] seps = s_commaSpaceColon;
        for (int i = 0; i < Iters; i++) "1234567,91234567 91234567:9".Split(seps);
    }

    [Benchmark]
    public unsafe void Split_ManySeps_Splits()
    {
        char[] seps = s_letters;
        for (int i = 0; i < Iters; i++) "1234567a91234567d91234567g9".Split(seps);
    }


    [Benchmark]
    public unsafe void Split_NoSeps_ManySplits()
    {
        for (int i = 0; i < Iters; i++) "1 3 5 7 9 2 4 6 8 1 3 5 7 9".Split();
    }

    [Benchmark]
    public unsafe void Split_1Sep_ManySplits()
    {
        for (int i = 0; i < Iters; i++) "1,3,5,7,9,2,4,6,8,1,3,5,7,9".Split(',');
    }

    [Benchmark]
    public unsafe void Split_2Seps_ManySplits()
    {
        char[] seps = s_commaSpace;
        for (int i = 0; i < Iters; i++) "1,3 5,7 9,2 4,6 8,1 3,5 7,9".Split(seps);
    }

    [Benchmark]
    public unsafe void Split_3Seps_ManySplits()
    {
        char[] seps = s_commaSpaceColon;
        for (int i = 0; i < Iters; i++) "1,3 5:7,9 2:4,6 8:1,3 5:7,9".Split(seps);
    }

    [Benchmark]
    public unsafe void Split_ManySeps_ManySplits()
    {
        char[] seps = s_letters;
        for (int i = 0; i < Iters; i++) "1a3b5c7d9e2f4g6a8b1c3d5e7f9".Split(seps);
    }
}

cc: @jkotas, @bbowyersmyth, @joperezr, @AlexGhiondea

Also reduces the amount of unsafe code used.
@@ -1091,12 +1091,12 @@ public String Replace(String oldValue, String newValue)

public unsafe String[] Split(char separator, StringSplitOptions options = StringSplitOptions.None)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

unsafe can now be removed here and below, right?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yup

@jkotas
Copy link
Member

jkotas commented Dec 1, 2017

@dotnet-bot test OSX10.12 x64 Checked Innerloop Build and Test please

@stephentoub stephentoub merged commit 1a628dc into dotnet:master Dec 1, 2017
@stephentoub stephentoub deleted the stringsplit_perf branch December 1, 2017 23:27
jashook pushed a commit to jashook/coreclr that referenced this pull request Dec 12, 2017
* Improve throughput of String.Split(char / char[], ...)

Also reduces the amount of unsafe code used.

* Address PR feedback
Sign up for free to subscribe to this conversation on GitHub. Already have an account? Sign in.
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants