Skip to content
This repository was archived by the owner on Jan 7, 2022. It is now read-only.

Commit e9cd536

Browse files
committed
Use custom cachable fs.realpath implementation
In this use case, we don't care much about a lot of the stuff that fs.realpath can (and should!) do. The only thing that's relevant to reading a package tree is whether package folders are symbolic links, and if so, where they point. Additionally, we don't need to re-start the fs.lstat party every time we walk to a new directory. While it makes sense for fs.realpath to do this in the general case, it's not required when reading a package tree, and results in a geometric explosion of lstat syscalls. For example, if a project is in /Users/hyooman/projects/company/website, and it has 1000 dependencies in node_modules, then a whopping 6,000 lstat calls will be made just to repeatedly verify that /Users/hyooman/projects/company/website/node_modules has not moved! In this implementation, every realpath call is cached, as is every lstat. Additionally, process.cwd() is assumed to be "real enough", and added to the cache initially, which means almost never having to walk all the way up to the root directory. In the npm cli project, this drops the lstat count from 14885 to 3054 for a single call to read-package-tree on my system. Larger projects, or projects deeper in a folder tree, will have even larger reductions. This does not account, itself, for a particularly large speed-up, since lstat calls do tend to be fairly fast, and the repetitiveness means that there are a lot of hits in the file system's stat cache. But it does make read-package-tree 10-30% faster in common use cases.
1 parent 4eed760 commit e9cd536

File tree

5 files changed

+238
-28
lines changed

5 files changed

+238
-28
lines changed

package.json

+2-1
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@
3131
},
3232
"homepage": "https://github.com/npm/read-package-tree",
3333
"files": [
34-
"rpt.js"
34+
"rpt.js",
35+
"realpath.js"
3536
],
3637
"tap": {
3738
"100": true

realpath.js

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
// look up the realpath, but cache stats to minimize overhead
2+
// If the parent folder is in the realpath cache, then we just
3+
// lstat the child, since there's no need to do a full realpath
4+
// This is not a separate module, and is much simpler than Node's
5+
// built-in fs.realpath, because we only care about symbolic links,
6+
// so we can handle many fewer edge cases.
7+
8+
const fs = require('fs')
9+
const { promisify } = require('util')
10+
const readlink = promisify(fs.readlink)
11+
const lstat = promisify(fs.lstat)
12+
const { resolve, basename, dirname } = require('path')
13+
14+
const realpathCached = (path, rpcache, stcache, depth) => {
15+
// just a safety against extremely deep eloops
16+
/* istanbul ignore next */
17+
if (depth > 2000)
18+
throw eloop(path)
19+
20+
if (rpcache.has(path))
21+
return Promise.resolve(rpcache.get(path))
22+
23+
const dir = dirname(path)
24+
const base = basename(path)
25+
26+
if (base && rpcache.has(dir))
27+
return realpathChild(dir, base, rpcache, stcache, depth)
28+
29+
// if it's the root, then we know it's real
30+
if (!base) {
31+
rpcache.set(dir, dir)
32+
return Promise.resolve(dir)
33+
}
34+
35+
// the parent, what is that?
36+
// find out, and then come back.
37+
return realpathCached(dir, rpcache, stcache, depth + 1).then(() =>
38+
realpathCached(path, rpcache, stcache, depth + 1))
39+
}
40+
41+
const lstatCached = (path, stcache) => {
42+
if (stcache.has(path))
43+
return Promise.resolve(stcache.get(path))
44+
45+
const p = lstat(path).then(st => {
46+
stcache.set(path, st)
47+
return st
48+
})
49+
stcache.set(path, p)
50+
return p
51+
}
52+
53+
// This is a slight fib, as it doesn't actually occur during a stat syscall.
54+
// But file systems are giant piles of lies, so whatever.
55+
const eloop = path =>
56+
Object.assign(new Error(
57+
`ELOOP: too many symbolic links encountered, stat '${path}'`), {
58+
errno: -62,
59+
syscall: 'stat',
60+
code: 'ELOOP',
61+
path: path,
62+
})
63+
64+
const realpathChild = (dir, base, rpcache, stcache, depth) => {
65+
const realdir = rpcache.get(dir)
66+
// that unpossible
67+
/* istanbul ignore next */
68+
if (typeof realdir === 'undefined')
69+
throw new Error('in realpathChild without parent being in realpath cache')
70+
71+
const realish = resolve(realdir, base)
72+
return lstatCached(realish, stcache).then(st => {
73+
if (!st.isSymbolicLink()) {
74+
rpcache.set(resolve(dir, base), realish)
75+
return realish
76+
}
77+
78+
let res
79+
return readlink(realish).then(target => {
80+
const resolved = res = resolve(realdir, target)
81+
if (realish === resolved)
82+
throw eloop(realish)
83+
84+
return realpathCached(resolved, rpcache, stcache, depth + 1)
85+
}).then(real => {
86+
rpcache.set(resolve(dir, base), real)
87+
return real
88+
})
89+
})
90+
}
91+
92+
module.exports = realpathCached

rpt.js

+81-27
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,17 @@
11
const fs = require('fs')
22
const { promisify } = require('util')
3-
const realpath = promisify(fs.realpath)
4-
const { basename, dirname, join } = require('path')
3+
const { resolve, basename, dirname, join } = require('path')
54
const rpj = promisify(require('read-package-json'))
65
const readdir = promisify(require('readdir-scoped-modules'))
6+
const realpath = require('./realpath.js')
77

88
let ID = 0
99
class Node {
1010
constructor (pkg, logical, physical, er, cache) {
1111
// should be impossible.
12+
const cached = cache.get(physical)
1213
/* istanbul ignore next */
13-
if (cache.get(physical))
14+
if (cached && !cached.then)
1415
throw new Error('re-creating already instantiated node')
1516

1617
cache.set(physical, this)
@@ -34,37 +35,84 @@ class Node {
3435
class Link extends Node {
3536
constructor (pkg, logical, physical, realpath, er, cache) {
3637
super(pkg, logical, physical, er, cache)
38+
39+
// if the target has started, but not completed, then
40+
// a Promise will be in the cache to indicate this.
3741
const cachedTarget = cache.get(realpath)
42+
if (cachedTarget && cachedTarget.then)
43+
cachedTarget.then(node => this.target = node)
44+
3845
this.target = cachedTarget || new Node(pkg, logical, realpath, er, cache)
3946
this.realpath = realpath
4047
this.isLink = true
41-
this.children = this.target.children
4248
this.error = er
49+
// convenience method only
50+
/* istanbul ignore next */
51+
Object.defineProperty(this, 'children', {
52+
get () {
53+
return this.target.children
54+
},
55+
set (c) {
56+
this.target.children = c
57+
},
58+
enumerable: true
59+
})
4360
}
4461
}
4562

46-
const loadNode = (logical, physical, cache) => new Promise((res, rej) => {
47-
res(cache.get(physical) || realpath(physical)
48-
.then(real =>
49-
rpj(join(real, 'package.json'))
50-
.then(pkg => [real, pkg, null], er => [real, null, er])
51-
.then(([real, pkg, er]) =>
52-
physical === real ? new Node(pkg, logical, physical, er, cache)
53-
: new Link(pkg, logical, physical, real, er, cache)
54-
),
55-
// if the realpath fails, don't bother with the rest
56-
er => new Node(null, logical, physical, er, cache))
57-
)
58-
})
59-
60-
const loadChildren = (node, cache, filterWith) => {
63+
// this is the way it is to expose a timing issue which is difficult to
64+
// test otherwise. The creation of a Node may take slightly longer than
65+
// the creation of a Link that targets it. If the Node has _begun_ its
66+
// creation phase (and put a Promise in the cache) then the Link will
67+
// get a Promise as its cachedTarget instead of an actual Node object.
68+
// This is not a problem, because it gets resolved prior to returning
69+
// the tree or attempting to load children. However, it IS remarkably
70+
// difficult to get to happen in a test environment to verify reliably.
71+
// Hence this kludge.
72+
const newNode = (pkg, logical, physical, er, cache) =>
73+
process.env._TEST_RPT_SLOW_LINK_TARGET_ === '1'
74+
? new Promise(res => setTimeout(() =>
75+
res(new Node(pkg, logical, physical, er, cache)), 10))
76+
: new Node(pkg, logical, physical, er, cache)
77+
78+
const loadNode = (logical, physical, cache, rpcache, stcache) => {
79+
// cache temporarily holds a promise placeholder so we
80+
// don't try to create the same node multiple times.
81+
// this is very rare to encounter, given the aggressive
82+
// caching on fs.realpath and fs.lstat calls, but
83+
// it can happen in theory.
84+
const cached = cache.get(physical)
85+
/* istanbul ignore next */
86+
if (cached)
87+
return Promise.resolve(cached)
88+
89+
const p = realpath(physical, rpcache, stcache, 0).then(real =>
90+
rpj(join(real, 'package.json'))
91+
.then(pkg => [pkg, null], er => [null, er])
92+
.then(([pkg, er]) =>
93+
physical === real ? newNode(pkg, logical, physical, er, cache)
94+
: new Link(pkg, logical, physical, real, er, cache)
95+
),
96+
// if the realpath fails, don't bother with the rest
97+
er => new Node(null, logical, physical, er, cache))
98+
99+
cache.set(physical, p)
100+
return p
101+
}
102+
103+
const loadChildren = (node, cache, filterWith, rpcache, stcache) => {
104+
// if a Link target has started, but not completed, then
105+
// a Promise will be in the cache to indicate this.
106+
if (node.then)
107+
return node.then(node => loadChildren(node, cache, filterWith, rpcache, stcache))
108+
61109
const nm = join(node.path, 'node_modules')
62-
return realpath(nm)
110+
return realpath(nm, rpcache, stcache, 0)
63111
.then(rm => readdir(rm).then(kids => [rm, kids]))
64112
.then(([rm, kids]) => Promise.all(
65113
kids.filter(kid =>
66114
kid.charAt(0) !== '.' && (!filterWith || filterWith(node, kid)))
67-
.map(kid => loadNode(join(nm, kid), join(rm, kid), cache)))
115+
.map(kid => loadNode(join(nm, kid), join(rm, kid), cache, rpcache, stcache)))
68116
).then(kidNodes => {
69117
kidNodes.forEach(k => k.parent = node)
70118
node.children = kidNodes.sort((a, b) =>
@@ -77,19 +125,20 @@ const loadChildren = (node, cache, filterWith) => {
77125
.catch(() => node)
78126
}
79127

80-
const loadTree = (node, did, cache, filterWith) => {
128+
const loadTree = (node, did, cache, filterWith, rpcache, stcache) => {
81129
// impossible except in pathological ELOOP cases
82130
/* istanbul ignore next */
83131
if (did.has(node.realpath))
84132
return Promise.resolve(node)
85133

86134
did.add(node.realpath)
87135

88-
return loadChildren(node, cache, filterWith)
136+
// load children on the target, not the link
137+
return loadChildren(node.target || node, cache, filterWith, rpcache, stcache)
89138
.then(node => Promise.all(
90139
node.children
91140
.filter(kid => !did.has(kid.realpath))
92-
.map(kid => loadTree(kid, did, cache, filterWith))
141+
.map(kid => loadTree(kid, did, cache, filterWith, rpcache, stcache))
93142
)).then(() => node)
94143
}
95144

@@ -100,10 +149,15 @@ const rpt = (root, filterWith, cb) => {
100149
filterWith = null
101150
}
102151

152+
root = resolve(root)
103153
const cache = new Map()
104-
const p = realpath(root)
105-
.then(realRoot => loadNode(root, realRoot, cache))
106-
.then(node => loadTree(node, new Set(), cache, filterWith))
154+
// we can assume that the cwd is real enough
155+
const cwd = process.cwd()
156+
const rpcache = new Map([[ cwd, cwd ]])
157+
const stcache = new Map()
158+
const p = realpath(root, rpcache, stcache, 0)
159+
.then(realRoot => loadNode(root, realRoot, cache, rpcache, stcache))
160+
.then(node => loadTree(node, new Set(), cache, filterWith, rpcache, stcache))
107161

108162
if (typeof cb === 'function')
109163
p.then(tree => cb(null, tree), cb)

tap-snapshots/test-basic.js-TAP.test.js

+30
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ [email protected] test/fixtures/linkedroot
4040
└── [email protected] test/fixtures/linkedroot/node_modules/foo
4141
`
4242

43+
exports[`test/basic.js TAP looking outside of cwd > must match snapshot 1`] = `
44+
[email protected] test/fixtures/root
45+
├─┬ @scope/[email protected] test/fixtures/root/node_modules/@scope/x
46+
│ └─┬ [email protected] test/fixtures/root/node_modules/@scope/x/node_modules/glob
47+
│ ├── [email protected] test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/graceful-fs
48+
│ ├── [email protected] test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/inherits
49+
│ ├─┬ [email protected] test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/minimatch
50+
│ │ ├── [email protected] test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/minimatch/node_modules/lru-cache
51+
│ │ └── [email protected] test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/minimatch/node_modules/sigmund
52+
│ └── [email protected] test/fixtures/root/node_modules/@scope/x/node_modules/glob/node_modules/once
53+
├── @scope/[email protected] test/fixtures/root/node_modules/@scope/y
54+
└── [email protected] test/fixtures/root/node_modules/foo
55+
`
56+
4357
exports[`test/basic.js TAP noname > noname tree 1`] = `
4458
test/fixtures/noname
4559
└── test/fixtures/noname/node_modules/foo
@@ -79,3 +93,19 @@ [email protected] test/fixtures/selflink
7993
│ └── [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/once
8094
└── [email protected] test/fixtures/selflink (symlink)
8195
`
96+
97+
exports[`test/basic.js TAP shake out Link target timing issue > must match snapshot 1`] = `
98+
[email protected] test/fixtures/selflink
99+
├── @scope/[email protected] test/fixtures/selflink/node_modules/@scope/y
100+
├─┬ @scope/[email protected] test/fixtures/selflink/node_modules/@scope/z
101+
│ └── [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob (symlink)
102+
└─┬ [email protected] test/fixtures/selflink/node_modules/foo
103+
├─┬ [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob
104+
│ ├── [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/graceful-fs
105+
│ ├── [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/inherits
106+
│ ├─┬ [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/minimatch
107+
│ │ ├── [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/minimatch/node_modules/lru-cache
108+
│ │ └── [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/minimatch/node_modules/sigmund
109+
│ └── [email protected] test/fixtures/selflink/node_modules/foo/node_modules/glob/node_modules/once
110+
└── [email protected] test/fixtures/selflink (symlink)
111+
`

test/basic.js

+33
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,22 @@ test('filterWith', t =>
8585
).then(d => t.matchSnapshot(archy(archyize(d)).trim()), 'only 1 level deep')
8686
)
8787

88+
test('looking outside of cwd', t => {
89+
const cwd = process.cwd()
90+
t.teardown(() => process.chdir(cwd))
91+
process.chdir('test/fixtures/selflink')
92+
return rpt('../root').then(d =>
93+
t.matchSnapshot(archy(archyize(d)).trim()))
94+
})
95+
96+
test('shake out Link target timing issue', t => {
97+
process.env._TEST_RPT_SLOW_LINK_TARGET_ = '1'
98+
const cwd = process.cwd()
99+
t.teardown(() => process.env._TEST_RPT_SLOW_LINK_TARGET_ = '')
100+
return rpt(path.resolve(fixtures, 'selflink')).then(d =>
101+
t.matchSnapshot(archy(archyize(d)).trim()))
102+
})
103+
88104
test('broken json', function (t) {
89105
rpt(path.resolve(fixtures, 'bad'), function (er, d) {
90106
t.ok(d.error, 'Got an error object')
@@ -152,6 +168,23 @@ function archyize (d, seen) {
152168
}
153169
}
154170

171+
test('realpath gutchecks', t => {
172+
const d = path.resolve(cwd, 'test/fixtures')
173+
const realpath = require('../realpath.js')
174+
const {realpathSync} = fs
175+
Object.keys(symlinks).map(link => t.test(link, t =>
176+
realpath(
177+
path.resolve(d, link),
178+
new Map(),
179+
new Map(),
180+
0
181+
).then(
182+
real => t.equal(real, realpathSync(path.resolve(d, link))),
183+
er => t.throws(()=> realpathSync(path.resolve(d, link)))
184+
)))
185+
t.end()
186+
})
187+
155188
test('cleanup', function (t) {
156189
cleanup()
157190
t.end()

0 commit comments

Comments
 (0)