Node.js Streams 与背压机制：处理大文件的正确姿势

前言#

在 Node.js 中处理大文件是一个经典场景——读取一个 10GB 的日志文件进行分析、将数据库导出的 CSV 转换格式、实时处理视频流……如果你用 fs.readFile() 一次性读入内存，要么 OOM 崩溃，要么严重阻塞事件循环。

Streams（流） 是 Node.js 处理大量数据的核心抽象。它将数据分成小块（chunk）逐步处理，内存占用恒定且可控。而背压（Backpressure） 是流系统中最重要也最容易被忽略的机制——它确保生产者不会淹没消费者。

本文将深入讲解 Node.js 四种流类型、pipeline API、背压原理，并通过实战案例展示正确处理大文件的姿势。

流的四种类型#

Node.js 提供四种基本流类型：

类型	描述	示例
Readable	可读流，数据源	`fs.createReadStream`, `http.IncomingMessage`
Writable	可写流，数据目标	`fs.createWriteStream`, `http.ServerResponse`
Duplex	双工流，可读可写	`net.Socket`, `zlib` 流
Transform	转换流，读入-处理-写出	`zlib.createGzip`, `crypto.createCipher`

Readable Stream（可读流）#

1
import { createReadStream } from 'fs';
2
import { Readable } from 'stream';
3

4
// 从文件创建可读流
5
const fileStream = createReadStream('/var/log/app.log', {
6
  encoding: 'utf-8',
7
  highWaterMark: 64 * 1024 // 每次读取 64KB
8
});
9

10
// 事件模式（flowing mode）
11
fileStream.on('data', (chunk) => {
12
  console.log(`Received ${chunk.length} bytes`);
13
});
14

15
fileStream.on('end', () => {
16
  console.log('File reading complete');
17
});
18

19
fileStream.on('error', (err) => {
20
  console.error('Read error:', err);
21
});

两种消费模式#

可读流有两种模式：flowing（流动） 和 paused（暂停）。

1
// Paused mode（暂停模式）— 手动拉取数据
2
fileStream.on('readable', () => {
3
  let chunk;
4
  while ((chunk = fileStream.read()) !== null) {
5
    console.log(`Read ${chunk.length} bytes`);
6
  }
7
});
8

9
// Flowing mode（流动模式）— 数据自动推送
10
fileStream.on('data', (chunk) => {
11
  // 一旦注册 data 事件，流自动进入 flowing mode
12
  process.stdout.write(chunk);
13
});
14

15
// 手动控制流动
16
fileStream.pause();  // 暂停
17
fileStream.resume(); // 恢复

自定义 Readable#

1
class CounterStream extends Readable {
2
  #current;
3
  #max;
4

5
  constructor(max, options = {}) {
6
    super(options);
7
    this.#current = 1;
8
    this.#max = max;
9
  }
10

11
  _read(size) {
12
    if (this.#current > this.#max) {
13
      this.push(null); // null 表示流结束
14
      return;
15
    }
16

17
    const str = String(this.#current++) + '\n';
18
    this.push(str);
19
  }
20
}
21

22
const counter = new CounterStream(100);
23
counter.pipe(process.stdout);
24
// 输出 1 到 100

从异步数据源创建 Readable#

1
import { Readable } from 'stream';
2

3
// 从异步生成器创建
4
async function* fetchAllPages(url) {
5
  let page = 1;
6
  let hasMore = true;
7

8
  while (hasMore) {
9
    const res = await fetch(`${url}?page=${page}`);
10
    const data = await res.json();
11
    yield JSON.stringify(data) + '\n';
12
    hasMore = data.length > 0;
13
    page++;
14
  }
15
}
16

17
const apiStream = Readable.from(fetchAllPages('https://api.example.com/items'));

Writable Stream（可写流）#

1
import { createWriteStream } from 'fs';
2
import { Writable } from 'stream';
3

4
// 文件可写流
5
const output = createWriteStream('./output.txt');
6

7
output.write('Hello ');
8
output.write('World\n');
9
output.end('Done!'); // end 写入最后的数据并关闭流
10

11
output.on('finish', () => {
12
  console.log('All data written');
13
});

自定义 Writable#

1
class LogWriter extends Writable {
2
  #lineCount = 0;
3

4
  _write(chunk, encoding, callback) {
5
    const lines = chunk.toString().split('\n').filter(Boolean);
6
    this.#lineCount += lines.length;
7

8
    for (const line of lines) {
9
      const timestamp = new Date().toISOString();
10
      process.stdout.write(`[${timestamp}] ${line}\n`);
11
    }
12

13
    // callback 必须调用，表示这个 chunk 处理完成
14
    // 如果处理出错，传入 error: callback(new Error('...'))
15
    callback();
16
  }
17

18
  _final(callback) {
19
    console.log(`\nTotal lines processed: ${this.#lineCount}`);
20
    callback();
21
  }
22
}
23

24
const logger = new LogWriter();
25
logger.write('First message\n');
26
logger.write('Second message\n');
27
logger.end('Final message\n');

Transform Stream（转换流）#

Transform 是最实用的流类型——它接收输入数据、处理后输出新的数据。

1
import { Transform } from 'stream';
2

3
// CSV 转 JSON 转换流
4
class CSVToJSON extends Transform {
5
  #headers = null;
6
  #buffer = '';
7

8
  constructor(options = {}) {
9
    super({ ...options, readableObjectMode: true });
10
  }
11

12
  _transform(chunk, encoding, callback) {
13
    this.#buffer += chunk.toString();
14
    const lines = this.#buffer.split('\n');
15
    this.#buffer = lines.pop() || ''; // 保留不完整的行
16

17
    for (const line of lines) {
18
      if (!line.trim()) continue;
19

20
      const values = line.split(',').map(v => v.trim());
21

22
      if (!this.#headers) {
23
        this.#headers = values;
24
        continue;
25
      }
26

27
      const obj = {};
28
      this.#headers.forEach((header, i) => {
29
        obj[header] = values[i];
30
      });
31

32
      this.push(obj);
33
    }
34

35
    callback();
36
  }
37

38
  _flush(callback) {
39
    // 处理最后残留的数据
40
    if (this.#buffer.trim() && this.#headers) {
41
      const values = this.#buffer.split(',').map(v => v.trim());
42
      const obj = {};
43
      this.#headers.forEach((header, i) => {
44
        obj[header] = values[i];
45
      });
46
      this.push(obj);
47
    }
48
    callback();
49
  }
50
}
51

52
// 使用
53
import { createReadStream } from 'fs';
54

55
const csvStream = createReadStream('./data.csv');
56
const parser = new CSVToJSON();
57

58
csvStream.pipe(parser).on('data', (record) => {
59
  console.log(record);
60
  // { name: 'Alice', age: '30', email: 'alice@test.com' }
61
});

实用 Transform 示例#

1
// 行分割转换流
2
class LineSplitter extends Transform {
3
  #buffer = '';
4

5
  constructor() {
6
    super({ readableObjectMode: true });
7
  }
8

9
  _transform(chunk, encoding, callback) {
10
    this.#buffer += chunk.toString();
11
    const lines = this.#buffer.split('\n');
12
    this.#buffer = lines.pop() || '';
13

14
    for (const line of lines) {
15
      if (line) this.push(line);
16
    }
17
    callback();
18
  }
19

20
  _flush(callback) {
21
    if (this.#buffer) this.push(this.#buffer);
22
    callback();
23
  }
24
}
25

26
// 进度报告转换流
27
class ProgressReporter extends Transform {
28
  #processed = 0;
29
  #total;
30
  #lastReport = 0;
31

32
  constructor(total) {
33
    super();
34
    this.#total = total;
35
  }
36

37
  _transform(chunk, encoding, callback) {
38
    this.#processed += chunk.length;
39

40
    const percent = Math.round((this.#processed / this.#total) * 100);
41
    if (percent - this.#lastReport >= 5) { // 每 5% 报告一次
42
      this.#lastReport = percent;
43
      process.stderr.write(`\rProgress: ${percent}% (${this.#processed} / ${this.#total} bytes)`);
44
    }
45

46
    this.push(chunk); // 数据原样传递
47
    callback();
48
  }
49

50
  _flush(callback) {
51
    process.stderr.write('\rProgress: 100% — Complete!\n');
52
    callback();
53
  }
54
}
55

56
// 数据过滤转换流
57
class JSONFilter extends Transform {
58
  #predicate;
59

60
  constructor(predicate) {
61
    super({ objectMode: true });
62
    this.#predicate = predicate;
63
  }
64

65
  _transform(obj, encoding, callback) {
66
    if (this.#predicate(obj)) {
67
      this.push(obj);
68
    }
69
    callback();
70
  }
71
}

Duplex Stream（双工流）#

双工流同时可读可写，但读写之间可以是独立的：

1
import { Duplex } from 'stream';
2

3
class EchoStream extends Duplex {
4
  #queue = [];
5

6
  _write(chunk, encoding, callback) {
7
    // 写入端接收数据，转换后放入队列
8
    this.#queue.push(chunk.toString().toUpperCase());
9
    callback();
10
  }
11

12
  _read(size) {
13
    if (this.#queue.length > 0) {
14
      this.push(this.#queue.shift());
15
    } else {
16
      // 没有数据可读时，延迟重试
17
      setTimeout(() => this._read(size), 10);
18
    }
19
  }
20
}

背压（Backpressure）#

背压是流系统中最关键的概念。当数据生产速度超过消费速度时，如果没有背压机制，数据会在内存中无限堆积。

背压问题演示#

1
import { createReadStream, createWriteStream } from 'fs';
2

3
const readable = createReadStream('./huge-file.dat'); // 从 SSD 读取，非常快
4
const writable = createWriteStream('./output.dat');    // 写入网络挂载的磁盘，很慢
5

6
// ❌ 错误做法：不处理背压
7
readable.on('data', (chunk) => {
8
  writable.write(chunk);
9
  // write() 返回 false 时说明内部缓冲区满了
10
  // 但我们忽略了返回值，继续狂写
11
  // → 内存不断增长 → OOM
12
});

手动处理背压#

1
// ✅ 正确做法：手动处理背压
2
readable.on('data', (chunk) => {
3
  const canContinue = writable.write(chunk);
4

5
  if (!canContinue) {
6
    // 内部缓冲区满了，暂停读取
7
    readable.pause();
8

9
    // 等缓冲区排空后再继续
10
    writable.once('drain', () => {
11
      readable.resume();
12
    });
13
  }
14
});
15

16
readable.on('end', () => {
17
  writable.end();
18
});

背压原理深入#

每个流都有一个内部缓冲区，大小由 highWaterMark 控制（默认 16KB for Buffer，16 objects for objectMode）。

1
生产者 (Readable)          消费者 (Writable)
2
  │                           │
3
  │  chunk ──────────────►  内部缓冲区
4
  │  chunk ──────────────►  [####............]  ← highWaterMark
5
  │  chunk ──────────────►  [########........]
6
  │  chunk ──────────────►  [############....]
7
  │  chunk ──────────────►  [################]  ← 缓冲区满！
8
  │                           │
9
  │  write() returns false    │
10
  │  ← 暂停生产               │
11
  │                           │  → 慢慢消费...
12
  │                           │  [########....]
13
  │                           │  [............]  ← 缓冲区清空
14
  │                           │
15
  │  'drain' event ─────────► │
16
  │  → 恢复生产               │

highWaterMark 不是硬限制——即使超过了，write() 仍然能成功，数据只是堆在内存里。但它是一个”水位标记”，告诉你何时该暂停。

pipeline()：正确连接流的方式#

手动处理背压容易出错，Node.js 提供了 pipeline() 函数来自动处理：

1
import { pipeline } from 'stream/promises';
2
import { createReadStream, createWriteStream } from 'fs';
3
import { createGzip } from 'zlib';
4

5
// pipeline 自动处理：
6
// 1. 背压
7
// 2. 错误传播
8
// 3. 流的清理（关闭/销毁）
9
async function compressFile(input, output) {
10
  await pipeline(
11
    createReadStream(input),
12
    createGzip(),
13
    createWriteStream(output)
14
  );
15
  console.log('Compression complete');
16
}
17

18
compressFile('./huge-file.log', './huge-file.log.gz')
19
  .catch(console.error);

pipeline vs pipe#

1
// ❌ pipe 的问题：
2
// 1. 不会自动传播错误
3
// 2. 错误时不会自动清理流
4
// 3. 需要手动给每个流添加 error handler
5
readable
6
  .pipe(transform1)
7
  .pipe(transform2)
8
  .pipe(writable);
9
// 如果 transform1 出错，readable 和 writable 不会被清理
10
// → 文件描述符泄漏
11

12
// ✅ pipeline 解决所有问题
13
import { pipeline } from 'stream';
14

15
pipeline(
16
  readable,
17
  transform1,
18
  transform2,
19
  writable,
20
  (err) => {
21
    if (err) {
22
      console.error('Pipeline failed:', err);
23
    } else {
24
      console.log('Pipeline succeeded');
25
    }
26
    // 所有流都已正确清理
27
  }
28
);
29

30
// 或者 Promise 版本
31
import { pipeline as pipelineAsync } from 'stream/promises';
32

33
try {
34
  await pipelineAsync(readable, transform1, transform2, writable);
35
} catch (err) {
36
  console.error('Pipeline failed:', err);
37
}

实战：大文件处理#

场景一：大 CSV 文件统计分析#

1
import { createReadStream } from 'fs';
2
import { pipeline } from 'stream/promises';
3
import { Transform, Writable } from 'stream';
4

5
// 行分割
6
class LineParser extends Transform {
7
  #buffer = '';
8

9
  constructor() {
10
    super({ readableObjectMode: true });
11
  }
12

13
  _transform(chunk, encoding, callback) {
14
    this.#buffer += chunk.toString();
15
    const lines = this.#buffer.split('\n');
16
    this.#buffer = lines.pop() || '';
17
    for (const line of lines) {
18
      if (line.trim()) this.push(line);
19
    }
20
    callback();
21
  }
22

23
  _flush(callback) {
24
    if (this.#buffer.trim()) this.push(this.#buffer);
25
    callback();
26
  }
27
}
28

29
// CSV 解析（简化版）
30
class CSVParser extends Transform {
31
  #headers = null;
32

33
  constructor() {
34
    super({ objectMode: true });
35
  }
36

37
  _transform(line, encoding, callback) {
38
    const values = line.split(',').map(v => v.trim().replace(/^"|"$/g, ''));
39

40
    if (!this.#headers) {
41
      this.#headers = values;
42
    } else {
43
      const record = {};
44
      this.#headers.forEach((h, i) => { record[h] = values[i]; });
45
      this.push(record);
46
    }
47
    callback();
48
  }
49
}
50

51
// 统计聚合
52
class Aggregator extends Writable {
53
  stats = {
54
    total: 0,
55
    sumAge: 0,
56
    cities: new Map(),
57
    minAge: Infinity,
58
    maxAge: -Infinity
59
  };
60

61
  constructor() {
62
    super({ objectMode: true });
63
  }
64

65
  _write(record, encoding, callback) {
66
    this.stats.total++;
67

68
    const age = Number(record.age);
69
    if (!isNaN(age)) {
70
      this.stats.sumAge += age;
71
      this.stats.minAge = Math.min(this.stats.minAge, age);
72
      this.stats.maxAge = Math.max(this.stats.maxAge, age);
73
    }
74

75
    const city = record.city || 'Unknown';
76
    this.stats.cities.set(city, (this.stats.cities.get(city) || 0) + 1);
77

78
    callback();
79
  }
80

81
  _final(callback) {
82
    console.log('\n=== Statistics ===');
83
    console.log(`Total records: ${this.stats.total}`);
84
    console.log(`Average age: ${(this.stats.sumAge / this.stats.total).toFixed(1)}`);
85
    console.log(`Age range: ${this.stats.minAge} - ${this.stats.maxAge}`);
86
    console.log('\nTop cities:');
87

88
    const sorted = [...this.stats.cities.entries()]
89
      .sort((a, b) => b[1] - a[1])
90
      .slice(0, 10);
91

92
    for (const [city, count] of sorted) {
93
      console.log(`  ${city}: ${count}`);
94
    }
95

96
    callback();
97
  }
98
}
99

100
// 运行
101
await pipeline(
102
  createReadStream('./users.csv', { highWaterMark: 256 * 1024 }),
103
  new LineParser(),
104
  new CSVParser(),
105
  new Aggregator()
106
);

场景二：日志文件实时处理#

1
import { createReadStream, createWriteStream, statSync } from 'fs';
2
import { pipeline } from 'stream/promises';
3
import { Transform } from 'stream';
4

5
// JSON 日志解析
6
class JSONLogParser extends Transform {
7
  #buffer = '';
8

9
  constructor() {
10
    super({ readableObjectMode: true });
11
  }
12

13
  _transform(chunk, encoding, callback) {
14
    this.#buffer += chunk.toString();
15
    const lines = this.#buffer.split('\n');
16
    this.#buffer = lines.pop() || '';
17

18
    for (const line of lines) {
19
      try {
20
        const log = JSON.parse(line);
21
        this.push(log);
22
      } catch {
23
        // 跳过无效的 JSON 行
24
      }
25
    }
26
    callback();
27
  }
28

29
  _flush(callback) {
30
    if (this.#buffer.trim()) {
31
      try {
32
        this.push(JSON.parse(this.#buffer));
33
      } catch { /* skip */ }
34
    }
35
    callback();
36
  }
37
}
38

39
// 错误日志过滤
40
class ErrorFilter extends Transform {
41
  constructor() {
42
    super({ objectMode: true });
43
  }
44

45
  _transform(log, encoding, callback) {
46
    if (log.level === 'error' || log.level === 'fatal') {
47
      this.push(log);
48
    }
49
    callback();
50
  }
51
}
52

53
// 格式化输出
54
class ErrorFormatter extends Transform {
55
  constructor() {
56
    super({ objectMode: true, writableObjectMode: true });
57
  }
58

59
  _transform(log, encoding, callback) {
60
    const formatted = [
61
      `[${log.timestamp || new Date().toISOString()}]`,
62
      `[${(log.level || 'UNKNOWN').toUpperCase()}]`,
63
      log.message || 'No message',
64
      log.stack ? `\n  Stack: ${log.stack}` : '',
65
      log.context ? `\n  Context: ${JSON.stringify(log.context)}` : '',
66
      '\n---\n'
67
    ].join(' ');
68

69
    this.push(formatted);
70
    callback();
71
  }
72
}
73

74
// 带进度的处理
75
const fileSize = statSync('./app.log').size;
76

77
class Progress extends Transform {
78
  #processed = 0;
79

80
  constructor(total) {
81
    super();
82
    this.total = total;
83
  }
84

85
  _transform(chunk, encoding, callback) {
86
    this.#processed += chunk.length;
87
    const pct = ((this.#processed / this.total) * 100).toFixed(1);
88
    process.stderr.write(`\rProcessing: ${pct}%`);
89
    this.push(chunk);
90
    callback();
91
  }
92

93
  _flush(callback) {
94
    process.stderr.write('\rProcessing: 100% — Done!\n');
95
    callback();
96
  }
97
}
98

99
await pipeline(
100
  createReadStream('./app.log'),
101
  new Progress(fileSize),
102
  new JSONLogParser(),
103
  new ErrorFilter(),
104
  new ErrorFormatter(),
105
  createWriteStream('./errors.log')
106
);
107

108
console.log('Error extraction complete');

场景三：HTTP 文件上传流式处理#

1
import { createServer } from 'http';
2
import { createWriteStream } from 'fs';
3
import { pipeline } from 'stream/promises';
4
import { createGunzip } from 'zlib';
5
import { Transform } from 'stream';
6
import { randomUUID } from 'crypto';
7

8
// 限速转换流
9
class RateLimiter extends Transform {
10
  #bytesPerSecond;
11
  #bytesThisSecond = 0;
12
  #lastReset = Date.now();
13

14
  constructor(bytesPerSecond) {
15
    super();
16
    this.#bytesPerSecond = bytesPerSecond;
17
  }
18

19
  _transform(chunk, encoding, callback) {
20
    const now = Date.now();
21
    if (now - this.#lastReset >= 1000) {
22
      this.#bytesThisSecond = 0;
23
      this.#lastReset = now;
24
    }
25

26
    this.#bytesThisSecond += chunk.length;
27

28
    if (this.#bytesThisSecond > this.#bytesPerSecond) {
29
      const delay = 1000 - (now - this.#lastReset);
30
      setTimeout(() => {
31
        this.push(chunk);
32
        callback();
33
      }, delay);
34
    } else {
35
      this.push(chunk);
36
      callback();
37
    }
38
  }
39
}
40

41
// 大小限制转换流
42
class SizeLimiter extends Transform {
43
  #maxBytes;
44
  #received = 0;
45

46
  constructor(maxBytes) {
47
    super();
48
    this.#maxBytes = maxBytes;
49
  }
50

51
  _transform(chunk, encoding, callback) {
52
    this.#received += chunk.length;
53

54
    if (this.#received > this.#maxBytes) {
55
      callback(new Error(`Upload exceeds maximum size of ${this.#maxBytes} bytes`));
56
      return;
57
    }
58

59
    this.push(chunk);
60
    callback();
61
  }
62
}
63

64
const server = createServer(async (req, res) => {
65
  if (req.method !== 'POST' || req.url !== '/upload') {
66
    res.writeHead(404);
67
    res.end('Not Found');
68
    return;
69
  }
70

71
  const fileId = randomUUID();
72
  const filePath = `./uploads/${fileId}.dat`;
73
  const isGzipped = req.headers['content-encoding'] === 'gzip';
74

75
  const streams = [
76
    req,
77
    new SizeLimiter(100 * 1024 * 1024), // 100MB 限制
78
  ];
79

80
  if (isGzipped) {
81
    streams.push(createGunzip());
82
  }
83

84
  streams.push(createWriteStream(filePath));
85

86
  try {
87
    await pipeline(...streams);
88
    res.writeHead(200, { 'Content-Type': 'application/json' });
89
    res.end(JSON.stringify({ fileId, path: filePath }));
90
  } catch (err) {
91
    res.writeHead(413, { 'Content-Type': 'application/json' });
92
    res.end(JSON.stringify({ error: err.message }));
93
  }
94
});
95

96
server.listen(3000, () => {
97
  console.log('Upload server running on :3000');
98
});

流的组合与复用#

compose() 函数#

Node.js 18+ 提供了 stream.compose() 来将多个 Transform 组合成一个：

1
import { compose } from 'stream';
2

3
// 将多个 Transform 组合成一个可复用的流
4
const logProcessor = compose(
5
  new LineParser(),
6
  new JSONLogParser(),
7
  new ErrorFilter(),
8
  new ErrorFormatter()
9
);
10

11
// 可以像单个 Transform 一样使用
12
await pipeline(
13
  createReadStream('./app.log'),
14
  logProcessor,
15
  createWriteStream('./errors.log')
16
);

自定义流工具库#

1
import { Transform } from 'stream';
2

3
// 工厂函数创建 Transform
4
function createTransform(transformFn, flushFn = null) {
5
  return new Transform({
6
    objectMode: true,
7
    transform(chunk, encoding, callback) {
8
      try {
9
        const result = transformFn(chunk);
10
        if (result !== undefined && result !== null) {
11
          this.push(result);
12
        }
13
        callback();
14
      } catch (err) {
15
        callback(err);
16
      }
17
    },
18
    flush(callback) {
19
      if (flushFn) {
20
        try {
21
          const result = flushFn();
22
          if (result !== undefined) this.push(result);
23
        } catch (err) {
24
          return callback(err);
25
        }
26
      }
27
      callback();
28
    }
29
  });
30
}
31

32
// 使用
33
const toUpperCase = () => createTransform(s => s.toString().toUpperCase());
34
const addLineNumbers = () => {
35
  let n = 0;
36
  return createTransform(line => `${++n}: ${line}`);
37
};
38
const filterEmpty = () => createTransform(line => line.trim() ? line : undefined);
39

40
await pipeline(
41
  createReadStream('./input.txt'),
42
  new LineParser(),
43
  filterEmpty(),
44
  toUpperCase(),
45
  addLineNumbers(),
46
  createTransform(line => line + '\n'),
47
  createWriteStream('./output.txt')
48
);

性能优化#

highWaterMark 调优#

1
// 默认 16KB，对于大文件处理可以增大
2
const readable = createReadStream('./huge.dat', {
3
  highWaterMark: 1024 * 1024 // 1MB — 减少 read 系统调用次数
4
});
5

6
// 对于对象模式，highWaterMark 是对象数量
7
const objectStream = new Transform({
8
  objectMode: true,
9
  highWaterMark: 1000 // 缓冲 1000 个对象
10
});

避免在 Transform 中进行同步阻塞#

1
// ❌ 阻塞事件循环
2
class BadTransform extends Transform {
3
  _transform(chunk, encoding, callback) {
4
    const result = heavyCPUWork(chunk); // 同步阻塞！
5
    this.push(result);
6
    callback();
7
  }
8
}
9

10
// ✅ 使用 setImmediate 让出事件循环
11
class GoodTransform extends Transform {
12
  _transform(chunk, encoding, callback) {
13
    setImmediate(() => {
14
      const result = heavyCPUWork(chunk);
15
      this.push(result);
16
      callback();
17
    });
18
  }
19
}
20

21
// ✅✅ 更好：使用 Worker Threads
22
import { Worker, isMainThread, parentPort, workerData } from 'worker_threads';
23

24
class WorkerTransform extends Transform {
25
  _transform(chunk, encoding, callback) {
26
    const worker = new Worker('./processor-worker.js', {
27
      workerData: chunk.toString()
28
    });
29

30
    worker.on('message', (result) => {
31
      this.push(result);
32
      callback();
33
    });
34

35
    worker.on('error', callback);
36
  }
37
}

内存监控#

1
import { Transform } from 'stream';
2

3
class MemoryMonitor extends Transform {
4
  #interval;
5

6
  constructor() {
7
    super();
8
    this.#interval = setInterval(() => {
9
      const usage = process.memoryUsage();
10
      console.log(`Memory — RSS: ${(usage.rss / 1024 / 1024).toFixed(1)}MB, ` +
11
        `Heap: ${(usage.heapUsed / 1024 / 1024).toFixed(1)}MB / ` +
12
        `${(usage.heapTotal / 1024 / 1024).toFixed(1)}MB`);
13
    }, 2000);
14
  }
15

16
  _transform(chunk, encoding, callback) {
17
    this.push(chunk);
18
    callback();
19
  }
20

21
  _flush(callback) {
22
    clearInterval(this.#interval);
23
    callback();
24
  }
25
}

Web Streams API#

Node.js 18+ 也支持 Web Streams API（与浏览器兼容）：

1
// Web Streams API
2
const readableStream = new ReadableStream({
3
  start(controller) {
4
    controller.enqueue('Hello');
5
    controller.enqueue('World');
6
    controller.close();
7
  }
8
});
9

10
const transformStream = new TransformStream({
11
  transform(chunk, controller) {
12
    controller.enqueue(chunk.toUpperCase());
13
  }
14
});
15

16
const writableStream = new WritableStream({
17
  write(chunk) {
18
    console.log(chunk);
19
  }
20
});
21

22
// 管道连接
23
await readableStream
24
  .pipeThrough(transformStream)
25
  .pipeTo(writableStream);
26

27
// Node.js 流与 Web Streams 互转
28
import { Readable } from 'stream';
29

30
const nodeStream = Readable.fromWeb(readableStream);
31
const webStream = Readable.toWeb(nodeReadableStream);

总结#

Node.js Streams 是处理大量数据的核心基础设施：

四种流类型各有分工：Readable 产出、Writable 消费、Transform 变换、Duplex 双向
背压机制是流系统的生命线——永远不要忽略 write() 的返回值
pipeline() 是连接流的最佳实践——自动处理背压、错误传播和资源清理
highWaterMark 是性能调优的关键旋钮
组合与复用让流处理管道可以像乐高一样拼装

掌握 Streams，你就掌握了 Node.js 处理海量数据的钥匙。

音乐

音乐

前言#