Skip to content

Increased runtime memory usage in 1.9 #1027

Closed
@hamiltop

Description

@hamiltop

What version of regex are you using?

1.9

Describe the bug at a high level.

The upgrade from 1.8.4 to 1.9 significantly increased runtime memory usage. (375MB vs 175MB)

Heap analysis suggests that this is due to regex_automata::nfa::thompson::backtrack::Cache::new::hc3a7922de221986c. (900 allocations at 256kb each, which a little more than the 200MB difference). Here's the full stack trace for those allocations:

alloc::alloc::Global::alloc_impl::hc0d2fbcc9e259c41	
_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$::allocate::hff80cfdf399332ed	
alloc::raw_vec::finish_grow::h1d4f4e7469250062	
alloc::raw_vec::RawVec$LT$T$C$A$GT$::grow_amortized::hb0b597f5657dcfe2	
alloc::raw_vec::RawVec$LT$T$C$A$GT$::reserve::do_reserve_and_handle::hafd8f04576418cba	
alloc::vec::Vec$LT$T$C$A$GT$::reserve::h9ddd934bc4485398	
alloc::vec::Vec$LT$T$C$A$GT$::extend_with::h866264405059bb08	
alloc::vec::Vec$LT$T$C$A$GT$::resize::h22b6e20adc554f2b	
regex_automata::nfa::thompson::backtrack::Visited::reset::hcd3ecf9f7bc5e143	
regex_automata::nfa::thompson::backtrack::Visited::new::h28c46b03d50c9004	
regex_automata::nfa::thompson::backtrack::Cache::new::hc3a7922de221986c	
regex_automata::nfa::thompson::backtrack::BoundedBacktracker::create_cache::heb005f34fc005666	
regex_automata::meta::wrappers::BoundedBacktrackerCache::new::_$u7b$$u7b$closure$u7d$$u7d$::h66c74de55ee73b0e	
core::option::Option$LT$T$GT$::map::hbfd3f56f2a53617b	
regex_automata::meta::wrappers::BoundedBacktrackerCache::new::h6625b6730d32ff87	
regex_automata::meta::wrappers::BoundedBacktracker::create_cache::h2c635eefad95c3ac	
_$LT$regex_automata..meta..strategy..Core$u20$as$u20$regex_automata..meta..strategy..Strategy$GT$::create_cache::haf98298fa360833d	
regex_automata::meta::regex::Builder::build_many_from_hir::_$u7b$$u7b$closure$u7d$$u7d$::ha3b32a0a124f1e6c	
_$LT$alloc..boxed..Box$LT$F$C$A$GT$$u20$as$u20$core..ops..function..Fn$LT$Args$GT$$GT$::call::h2983e1b0fb6fda94	
regex_automata::util::pool::inner::Pool$LT$T$C$F$GT$::get_slow::hd0a3e0958d9d9787	
regex_automata::util::pool::inner::Pool$LT$T$C$F$GT$::get::hc70ab56136e5ccdb	
regex_automata::util::pool::Pool$LT$T$C$F$GT$::get::h37bc82ca85a795cf	
regex_automata::meta::regex::Regex::search_half::haad5eaf00688f9e3	
regex::regex::string::Regex::is_match_at::hefc78212cd6a6e5e	
regex::regex::string::Regex::is_match::h978417c482a3b499	
_$LT$uaparser..parser..os..Matcher$u20$as$u20$uaparser..SubParser$GT$::try_parse::h3fac1011bb0e5792	
_$LT$uaparser..parser..UserAgentParser$u20$as$u20$uaparser..Parser$GT$::parse_os::_$u7b$$u7b$closure$u7d$$u7d$::h034f173e256d694c	
_$LT$core..slice..iter..Iter$LT$T$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$::find_map::h9db4b2cdc08ef062	
_$LT$uaparser..parser..UserAgentParser$u20$as$u20$uaparser..Parser$GT$::parse_os::h102f7f7facbe3882	

What are the steps to reproduce the behavior?

use std::{thread, vec};

use once_cell::sync::Lazy;
use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use uaparser::{Parser, UserAgentParser};

pub static USER_AGENT_PARSER: Lazy<UserAgentParser> =
    Lazy::new(|| UserAgentParser::from_bytes(include_bytes!("../regexes.yml")).expect("error constructing parser"));

fn main() {
    let uap = &USER_AGENT_PARSER;
    let mut handles = vec![];
    for _ in 0..10 {
        let handle = thread::spawn(move || {
            let mut android_count = 0;
            for _ in 0..100_000 {
                let rand_string: String = thread_rng()
                    .sample_iter(&Alphanumeric)
                    .take(30)
                    .map(char::from)
                    .collect();
                let os = uap.parse_os(&rand_string);
                if os.family == "Android" {
                    android_count += 1;
                }
            }
            println!("Android count: {}", android_count);
        });
        handles.push(handle);
    }
    for handle in handles {
        handle.join().unwrap();
    }
}

with dependencies:

[dependencies]
once_cell= "1.17.1"
uaparser = { version = "0.6.0", default-features = false }
rand = "0.8.5"
regex = "1.9"

and the regexes.yml file can be found here: https://gist.github.com/hamiltop/b010fb9e0189210796c526144b27bd99

What is the actual behavior?

On regex 1.9 that uses 375MB of memory on my machine.

What is the expected behavior?

On regex 1.8.4 that uses 175MB of memory on my machine.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions