调试练习:诊断间歇性测试失败
场景描述
你接手了一个异步测试套件,测试有时通过有时失败——这是典型的"间歇性测试"问题。你需要使用 systematic-debugging 技能诊断并修复这个问题。
问题现象
$ npm test
✓ should connect to server (5ms)
✓ should receive messages (12ms)
✗ should handle disconnect
Error: Expected connection state to be 'disconnected' but got 'connected'
53 passing
1 failing重新运行:
$ npm test
✓ should connect to server (5ms)
✗ should receive messages
Error: Timeout exceeded waiting for message
53 passing
1 failing每次运行失败的测试都不同!
初始代码
WebSocket 客户端
javascript
// client.js
class ChatClient {
constructor(url) {
this.url = url;
this.socket = null;
this.messages = [];
this.connected = false;
}
connect() {
this.socket = new WebSocket(this.url);
this.socket.onopen = () => {
this.connected = true;
};
this.socket.onmessage = (event) => {
this.messages.push(JSON.parse(event.data));
};
this.socket.onclose = () => {
this.connected = false;
};
}
send(message) {
if (this.connected) {
this.socket.send(JSON.stringify(message));
}
}
disconnect() {
if (this.socket) {
this.socket.close();
}
}
}
module.exports = { ChatClient };测试文件
javascript
// client.test.js
const { ChatClient } = require('./client');
const { WebSocketServer } = require('ws');
describe('ChatClient', () => {
let server;
let client;
let port;
beforeEach((done) => {
server = new WebSocketServer({ port: 0 }, () => {
port = server.address().port;
done();
});
});
afterEach(() => {
if (client) client.disconnect();
if (server) server.close();
});
test('should connect to server', (done) => {
client = new ChatClient(`ws://localhost:${port}`);
client.connect();
setTimeout(() => {
expect(client.connected).toBe(true);
done();
}, 100);
});
test('should receive messages', (done) => {
client = new ChatClient(`ws://localhost:${port}`);
client.connect();
server.on('connection', (ws) => {
ws.send(JSON.stringify({ text: 'Hello' }));
});
setTimeout(() => {
expect(client.messages.length).toBe(1);
expect(client.messages[0].text).toBe('Hello');
done();
}, 100);
});
test('should handle disconnect', (done) => {
client = new ChatClient(`ws://localhost:${port}`);
client.connect();
setTimeout(() => {
client.disconnect();
setTimeout(() => {
expect(client.connected).toBe(false);
done();
}, 100);
}, 100);
});
test('should send messages', (done) => {
client = new ChatClient(`ws://localhost:${port}`);
client.connect();
let received = null;
server.on('connection', (ws) => {
ws.on('message', (data) => {
received = JSON.parse(data);
});
});
setTimeout(() => {
client.send({ text: 'Test message' });
setTimeout(() => {
expect(received).not.toBeNull();
expect(received.text).toBe('Test message');
done();
}, 100);
}, 100);
});
});你的任务
使用 systematic-debugging 技能诊断问题:
- 问题确认:明确描述问题是什么
- 假设生成:列出可能导致间歇性失败的原因
- 实验设计:设计实验来验证假设
- 根因确认:找到问题的根本原因
- 修复验证:修复后验证问题不再出现
调试流程指南
第一阶段:问题确认
先回答这些问题:
- 问题的具体表现是什么?
- 什么条件下会出现?
- 频率如何?(每次?50%?偶尔?)
第二阶段:假设生成
根据代码分析可能的根因:
可能原因:
1. 时序问题:setTimeout 时间不够
2. 状态污染:beforeEach/afterEach 清理不完整
3. 资源竞争:多个测试共享资源
4. 网络延迟:WebSocket 连接建立需要时间第三阶段:实验设计
设计实验验证每个假设:
javascript
// 实验 1:增加超时时间
setTimeout(() => {
expect(client.connected).toBe(true);
done();
}, 500); // 从 100ms 增加到 500ms
// 实验 2:添加日志
client.socket.onopen = () => {
console.log('Connection opened at', Date.now());
this.connected = true;
};
// 实验 3:检查清理状态
afterEach(() => {
console.log('Cleanup: connected =', client?.connected);
// ...
});第四阶段:根因分析
使用"5个为什么"方法:
问题:测试间歇性失败
↓ 为什么?
答案:有时断言执行时状态还未更新
↓ 为什么?
答案:异步操作(WebSocket 连接)可能很慢
↓ 为什么?
答案:使用固定的 setTimeout 等待
↓ 为什么?
答案:没有更好的方式等待异步操作
↓ 为什么?
答案:应该使用条件等待而非固定时间提示
点击展开提示
提示 1:竞态条件
setTimeout 是测试中最常见的竞态条件来源。不同的机器、负载下,异步操作完成时间不同。
提示 2:更好的等待方式
不要用 setTimeout,使用"条件等待":
javascript
// 条件等待函数
async function waitFor(condition, timeout = 5000) {
const start = Date.now();
while (!condition()) {
if (Date.now() - start > timeout) {
throw new Error('Condition not met within timeout');
}
await new Promise(r => setTimeout(r, 50));
}
}
// 使用
await waitFor(() => client.connected);提示 3:事件驱动的等待
对于 WebSocket,更优雅的方式是监听事件:
javascript
function connectAsync(client) {
return new Promise((resolve) => {
client.socket.onopen = () => {
client.connected = true;
resolve();
};
});
}
// 使用
await connectAsync(client);
expect(client.connected).toBe(true);提示 4:检查 afterEach
确保每个测试后完全清理状态:
javascript
afterEach(async () => {
if (client) {
await client.disconnect();
client = null;
}
if (server) {
await new Promise(resolve => server.close(resolve));
server = null;
}
});参考答案
点击展开参考答案
根因
测试使用了固定的 setTimeout 等待异步操作完成,但异步操作的实际完成时间不确定。这是典型的"竞态条件"问题。
修复方案
方案 1:使用条件等待
javascript
// helpers.js
function waitFor(predicate, timeout = 5000) {
return new Promise((resolve, reject) => {
const start = Date.now();
const check = () => {
if (predicate()) {
resolve();
} else if (Date.now() - start > timeout) {
reject(new Error('Timeout waiting for condition'));
} else {
setTimeout(check, 50);
}
};
check();
});
}
module.exports = { waitFor };方案 2:使用 Promise 包装
javascript
// client.js - 添加 Promise API
class ChatClient {
// ... 现有代码 ...
connectAsync() {
return new Promise((resolve, reject) => {
this.socket = new WebSocket(this.url);
this.socket.onopen = () => {
this.connected = true;
resolve();
};
this.socket.onerror = (error) => {
reject(error);
};
this.socket.onmessage = (event) => {
this.messages.push(JSON.parse(event.data));
};
this.socket.onclose = () => {
this.connected = false;
};
});
}
disconnectAsync() {
return new Promise((resolve) => {
if (!this.socket) {
resolve();
return;
}
this.socket.onclose = () => {
this.connected = false;
resolve();
};
this.socket.close();
});
}
waitForMessage(timeout = 5000) {
return new Promise((resolve, reject) => {
const start = Date.now();
const check = () => {
if (this.messages.length > 0) {
resolve(this.messages[this.messages.length - 1]);
} else if (Date.now() - start > timeout) {
reject(new Error('Timeout waiting for message'));
} else {
setTimeout(check, 50);
}
};
check();
});
}
}修复后的测试
javascript
// client.test.js
const { ChatClient } = require('./client');
const { WebSocketServer } = require('ws');
const { waitFor } = require('./helpers');
describe('ChatClient', () => {
let server;
let client;
let port;
beforeEach(async () => {
server = await new Promise((resolve) => {
const s = new WebSocketServer({ port: 0 }, () => {
resolve(s);
});
});
port = server.address().port;
});
afterEach(async () => {
if (client) {
await client.disconnectAsync();
client = null;
}
if (server) {
await new Promise(resolve => server.close(resolve));
server = null;
}
});
test('should connect to server', async () => {
client = new ChatClient(`ws://localhost:${port}`);
await client.connectAsync();
expect(client.connected).toBe(true);
});
test('should receive messages', async () => {
client = new ChatClient(`ws://localhost:${port}`);
server.on('connection', (ws) => {
ws.send(JSON.stringify({ text: 'Hello' }));
});
await client.connectAsync();
const message = await client.waitForMessage();
expect(message.text).toBe('Hello');
});
test('should handle disconnect', async () => {
client = new ChatClient(`ws://localhost:${port}`);
await client.connectAsync();
await client.disconnectAsync();
expect(client.connected).toBe(false);
});
test('should send messages', async () => {
client = new ChatClient(`ws://localhost:${port}`);
const received = new Promise((resolve) => {
server.on('connection', (ws) => {
ws.on('message', (data) => {
resolve(JSON.parse(data));
});
});
});
await client.connectAsync();
client.send({ text: 'Test message' });
const message = await received;
expect(message.text).toBe('Test message');
});
});关键改进
- 移除 setTimeout:用 Promise 和条件等待替代
- 异步 API:为客户端添加 Promise 风格 API
- 正确清理:afterEach 使用 async/await 确保清理完成
- 确定性测试:测试结果不再依赖时机
学习要点
1. 时序问题诊断
间歇性失败通常是:
- 竞态条件(Race Condition)
- 固定等待时间
- 状态清理不完整
2. 条件等待 vs 固定等待
| 固定等待 | 条件等待 |
|---|---|
setTimeout(fn, 100) | waitFor(condition) |
| 时间不确定时可能失败 | 条件满足即返回 |
| 浪费时间(等待过长) | 高效(条件满足立即返回) |
| 不稳定 | 稳定 |
3. 测试隔离
每个测试应该:
- 独立初始化状态
- 完全清理资源
- 不依赖其他测试的副作用
4. 异步测试最佳实践
javascript
// ❌ 错误:固定等待
setTimeout(() => {
expect(state).toBe('ready');
done();
}, 100);
// ✅ 正确:条件等待
await waitFor(() => state === 'ready');
expect(state).toBe('ready');常见错误
| 错误 | 正确做法 |
|---|---|
| 使用 setTimeout 固定等待 | 使用条件等待或 Promise |
| beforeEach 不等待初始化完成 | 使用 async beforeEach |
| afterEach 不等待清理完成 | 使用 async afterEach |
| 测试之间共享状态 | 每个测试独立初始化 |
进阶练习
完成基础练习后,尝试:
- 添加重连机制:连接断开后自动重连
- 消息确认机制:等待服务器确认后才认为消息发送成功
- 连接超时处理:连接超过一定时间未建立则报错
相关技能
- systematic-debugging - 系统化调试
- test-driven-development - TDD 核心技能
- verification-before-completion - 完成前验证