Date: Fri, 25 Jan 2008 17:53:49 +0100 (CET) From: Stefan Richter Subject: firewire: fw-core: react on bus resets while the config ROM is being fetched read_rom() obtained a fresh new fw_device.generation for each read transaction. Hence it was able to continue reading in the middle of the ROM even if a bus reset happened. However the device may have modified the ROM during the reset. We would end up with a corrupt fetched ROM image then. Although all of this is quite unlikely, it is not impossible. Therefore we now restart reading the ROM if the bus generation changed. Note, the memory barrier in read_rom() is still necessary according to tests by Jarod Wilson, despite of the ->generation access being moved up in the call chain. Signed-off-by: Stefan Richter This is essentially what I've been beating on locally, and I've yet to hit another config rom read failure with it. Signed-off-by: Jarod Wilson --- drivers/firewire/fw-device.c | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) Index: linux-2.6.24/drivers/firewire/fw-device.c =================================================================== --- linux-2.6.24.orig/drivers/firewire/fw-device.c +++ linux-2.6.24/drivers/firewire/fw-device.c @@ -389,12 +389,12 @@ complete_transaction(struct fw_card *car complete(&callback_data->done); } -static int read_rom(struct fw_device *device, int index, u32 * data) +static int +read_rom(struct fw_device *device, int generation, int index, u32 *data) { struct read_quadlet_callback_data callback_data; struct fw_transaction t; u64 offset; - int generation = device->generation; /* device->node_id, accessed below, must not be older than generation */ smp_rmb(); @@ -413,7 +413,14 @@ static int read_rom(struct fw_device *de return callback_data.rcode; } -static int read_bus_info_block(struct fw_device *device) +/* + * Read the bus info block, perform a speed probe, and read all of the rest of + * the config ROM. We do all this with a cached bus generation. If the bus + * generation changes under us, read_bus_info_block will fail and get retried. + * It's better to start all over in this case because the node from which we + * are reading the ROM may have changed the ROM during the reset. + */ +static int read_bus_info_block(struct fw_device *device, int generation) { static u32 rom[256]; u32 stack[16], sp, key; @@ -423,7 +430,7 @@ static int read_bus_info_block(struct fw /* First read the bus info block. */ for (i = 0; i < 5; i++) { - if (read_rom(device, i, &rom[i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, &rom[i]) != RCODE_COMPLETE) return -1; /* * As per IEEE1212 7.2, during power-up, devices can @@ -458,7 +465,8 @@ static int read_bus_info_block(struct fw device->max_speed = device->card->link_speed; while (device->max_speed > SCODE_100) { - if (read_rom(device, 0, &dummy) == RCODE_COMPLETE) + if (read_rom(device, generation, 0, &dummy) == + RCODE_COMPLETE) break; device->max_speed--; } @@ -491,7 +499,7 @@ static int read_bus_info_block(struct fw return -1; /* Read header quadlet for the block to get the length. */ - if (read_rom(device, i, &rom[i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, &rom[i]) != RCODE_COMPLETE) return -1; end = i + (rom[i] >> 16) + 1; i++; @@ -510,7 +518,8 @@ static int read_bus_info_block(struct fw * it references another block, and push it in that case. */ while (i < end) { - if (read_rom(device, i, &rom[i]) != RCODE_COMPLETE) + if (read_rom(device, generation, i, &rom[i]) != + RCODE_COMPLETE) return -1; if ((key >> 30) == 3 && (rom[i] >> 30) > 1 && sp < ARRAY_SIZE(stack)) @@ -657,7 +666,7 @@ static void fw_device_init(struct work_s * device. */ - if (read_bus_info_block(device) < 0) { + if (read_bus_info_block(device, device->generation) < 0) { if (device->config_rom_retries < MAX_RETRIES) { device->config_rom_retries++; schedule_delayed_work(&device->work, RETRY_DELAY);