Closed
Description
What version of regex are you using?
1.9
Describe the bug at a high level.
The upgrade from 1.8.4 to 1.9 significantly increased runtime memory usage. (375MB vs 175MB)
Heap analysis suggests that this is due to regex_automata::nfa::thompson::backtrack::Cache::new::hc3a7922de221986c
. (900 allocations at 256kb each, which a little more than the 200MB difference). Here's the full stack trace for those allocations:
alloc::alloc::Global::alloc_impl::hc0d2fbcc9e259c41
_$LT$alloc..alloc..Global$u20$as$u20$core..alloc..Allocator$GT$::allocate::hff80cfdf399332ed
alloc::raw_vec::finish_grow::h1d4f4e7469250062
alloc::raw_vec::RawVec$LT$T$C$A$GT$::grow_amortized::hb0b597f5657dcfe2
alloc::raw_vec::RawVec$LT$T$C$A$GT$::reserve::do_reserve_and_handle::hafd8f04576418cba
alloc::vec::Vec$LT$T$C$A$GT$::reserve::h9ddd934bc4485398
alloc::vec::Vec$LT$T$C$A$GT$::extend_with::h866264405059bb08
alloc::vec::Vec$LT$T$C$A$GT$::resize::h22b6e20adc554f2b
regex_automata::nfa::thompson::backtrack::Visited::reset::hcd3ecf9f7bc5e143
regex_automata::nfa::thompson::backtrack::Visited::new::h28c46b03d50c9004
regex_automata::nfa::thompson::backtrack::Cache::new::hc3a7922de221986c
regex_automata::nfa::thompson::backtrack::BoundedBacktracker::create_cache::heb005f34fc005666
regex_automata::meta::wrappers::BoundedBacktrackerCache::new::_$u7b$$u7b$closure$u7d$$u7d$::h66c74de55ee73b0e
core::option::Option$LT$T$GT$::map::hbfd3f56f2a53617b
regex_automata::meta::wrappers::BoundedBacktrackerCache::new::h6625b6730d32ff87
regex_automata::meta::wrappers::BoundedBacktracker::create_cache::h2c635eefad95c3ac
_$LT$regex_automata..meta..strategy..Core$u20$as$u20$regex_automata..meta..strategy..Strategy$GT$::create_cache::haf98298fa360833d
regex_automata::meta::regex::Builder::build_many_from_hir::_$u7b$$u7b$closure$u7d$$u7d$::ha3b32a0a124f1e6c
_$LT$alloc..boxed..Box$LT$F$C$A$GT$$u20$as$u20$core..ops..function..Fn$LT$Args$GT$$GT$::call::h2983e1b0fb6fda94
regex_automata::util::pool::inner::Pool$LT$T$C$F$GT$::get_slow::hd0a3e0958d9d9787
regex_automata::util::pool::inner::Pool$LT$T$C$F$GT$::get::hc70ab56136e5ccdb
regex_automata::util::pool::Pool$LT$T$C$F$GT$::get::h37bc82ca85a795cf
regex_automata::meta::regex::Regex::search_half::haad5eaf00688f9e3
regex::regex::string::Regex::is_match_at::hefc78212cd6a6e5e
regex::regex::string::Regex::is_match::h978417c482a3b499
_$LT$uaparser..parser..os..Matcher$u20$as$u20$uaparser..SubParser$GT$::try_parse::h3fac1011bb0e5792
_$LT$uaparser..parser..UserAgentParser$u20$as$u20$uaparser..Parser$GT$::parse_os::_$u7b$$u7b$closure$u7d$$u7d$::h034f173e256d694c
_$LT$core..slice..iter..Iter$LT$T$GT$$u20$as$u20$core..iter..traits..iterator..Iterator$GT$::find_map::h9db4b2cdc08ef062
_$LT$uaparser..parser..UserAgentParser$u20$as$u20$uaparser..Parser$GT$::parse_os::h102f7f7facbe3882
What are the steps to reproduce the behavior?
use std::{thread, vec};
use once_cell::sync::Lazy;
use rand::distributions::Alphanumeric;
use rand::{thread_rng, Rng};
use uaparser::{Parser, UserAgentParser};
pub static USER_AGENT_PARSER: Lazy<UserAgentParser> =
Lazy::new(|| UserAgentParser::from_bytes(include_bytes!("../regexes.yml")).expect("error constructing parser"));
fn main() {
let uap = &USER_AGENT_PARSER;
let mut handles = vec![];
for _ in 0..10 {
let handle = thread::spawn(move || {
let mut android_count = 0;
for _ in 0..100_000 {
let rand_string: String = thread_rng()
.sample_iter(&Alphanumeric)
.take(30)
.map(char::from)
.collect();
let os = uap.parse_os(&rand_string);
if os.family == "Android" {
android_count += 1;
}
}
println!("Android count: {}", android_count);
});
handles.push(handle);
}
for handle in handles {
handle.join().unwrap();
}
}
with dependencies:
[dependencies]
once_cell= "1.17.1"
uaparser = { version = "0.6.0", default-features = false }
rand = "0.8.5"
regex = "1.9"
and the regexes.yml
file can be found here: https://gist.github.com/hamiltop/b010fb9e0189210796c526144b27bd99
What is the actual behavior?
On regex 1.9 that uses 375MB of memory on my machine.
What is the expected behavior?
On regex 1.8.4 that uses 175MB of memory on my machine.
Metadata
Metadata
Assignees
Labels
No labels