spi25: Implement multi-i/o reads

We describe a read operation in a new  `struct spi_read_op`. It's
comprised of the i/o mode, its opcode, an optional mode byte, and
the number of dummy bytes.

Based on this information  about the various read operations, and
the flash and master feature flags,  we select the read operation
with the highest throughput.

The following assumption is made about 4BA chips: When it supports
native-4BA fast reads  and a multi-i/o version of the regular fast
read, then it should also support the respective native-4BA, multi-
i/o version (yes, JEDEC, there are too many read commands!). So far
this seems to hold for the chips in our database.

Change-Id: I3c93e71d85f769831d637c14d3571f7ddb54d8b2
Signed-off-by: Nico Huber <nico.h@gmx.de>
Reviewed-on: https://review.sourcearcade.org/c/flashprog/+/49
Reviewed-by: Arthur Heymans <arthur@aheymans.xyz>
diff --git a/dediprog.c b/dediprog.c
index db4e39f..21efa4d 100644
--- a/dediprog.c
+++ b/dediprog.c
@@ -398,7 +398,7 @@
 	if (protocol(dp_data) >= PROTOCOL_V2) {
 		if (is_read && flash->chip->feature_bits & FEATURE_4BA_FAST_READ) {
 			data_packet[3] = READ_MODE_4B_ADDR_FAST_0x0C;
-			data_packet[4] = JEDEC_READ_4BA_FAST;
+			data_packet[4] = JEDEC_FAST_READ_4BA;
 		} else if (dedi_spi_cmd == WRITE_MODE_PAGE_PGM
 			   && (flash->chip->feature_bits & FEATURE_4BA_WRITE)) {
 			if (protocol(dp_data) >= PROTOCOL_V3)
diff --git a/include/flash.h b/include/flash.h
index da73df5..f839995 100644
--- a/include/flash.h
+++ b/include/flash.h
@@ -428,6 +428,8 @@
 	void *user_data;
 };
 
+struct spi_read_op;
+
 struct flashprog_flashctx {
 	struct flashchip *chip;
 	/* FIXME: The memory mappings should be saved in a more structured way. */
@@ -458,6 +460,8 @@
            of the extended address register. */
 	int address_high_byte;
 	bool in_4ba_mode;
+	/* For SPI flash chips, we dynamically select the fast-read operation. */
+	const struct spi_read_op *spi_fast_read;
 
 	int chip_restore_fn_count;
 	struct chip_restore_func_data {
diff --git a/include/spi.h b/include/spi.h
index 8935997..9cf5154 100644
--- a/include/spi.h
+++ b/include/spi.h
@@ -194,9 +194,11 @@
 #define JEDEC_READ		0x03
 #define JEDEC_READ_OUTSIZE	0x04
 /*      JEDEC_READ_INSIZE : any length */
-
-/* Read the memory (with delay after sending address) */
-#define JEDEC_READ_FAST		0x0b
+#define JEDEC_FAST_READ		0x0b /* with 8 cycles delay after sending address */
+#define JEDEC_FAST_READ_DOUT	0x3b /* with 8 cycles delay and dual output */
+#define JEDEC_FAST_READ_DIO	0xbb /* with 4 cycles delay and dual i/o */
+#define JEDEC_FAST_READ_QOUT	0x6b /* with 8 cycles delay and quad output */
+#define JEDEC_FAST_READ_QIO	0xeb /* with 6 cycles delay and quad i/o */
 
 /* Write memory byte */
 #define JEDEC_BYTE_PROGRAM		0x02
@@ -211,11 +213,12 @@
 
 /* Read the memory with 4-byte address
    From ANY mode (3-bytes or 4-bytes) it works with 4-byte address */
-#define JEDEC_READ_4BA		0x13
-
-/* Read the memory with 4-byte address (and delay after sending address)
-   From ANY mode (3-bytes or 4-bytes) it works with 4-byte address */
-#define JEDEC_READ_4BA_FAST	0x0c
+#define JEDEC_READ_4BA			0x13
+#define JEDEC_FAST_READ_4BA		0x0c /* with 8 cycles delay after sending address */
+#define JEDEC_FAST_READ_DOUT_4BA	0x3c /* with 8 cycles delay and dual output */
+#define JEDEC_FAST_READ_DIO_4BA		0xbc /* with 4 cycles delay and dual i/o */
+#define JEDEC_FAST_READ_QOUT_4BA	0x6c /* with 8 cycles delay and quad output */
+#define JEDEC_FAST_READ_QIO_4BA		0xec /* with 6 cycles delay and quad i/o */
 
 /* Write memory byte with 4-byte address
    From ANY mode (3-bytes or 4-bytes) it works with 4-byte address */
diff --git a/include/spi_command.h b/include/spi_command.h
index 54dfe48..b0daeee 100644
--- a/include/spi_command.h
+++ b/include/spi_command.h
@@ -68,6 +68,15 @@
 	QPI_4_4_4,
 };
 
+/* describes properties of a read operation */
+struct spi_read_op {
+	enum io_mode io_mode;
+	bool native_4ba;
+	uint8_t opcode;
+	uint8_t mode_byte;	/* optional byte to send after the address, if != 0 */
+	uint8_t dummy_len;	/* dummy bytes (including optional mode byte) */
+};
+
 struct spi_command {
 	enum io_mode io_mode;
 	size_t opcode_len;	/* bytes to write in opcode i/o phase */
diff --git a/spi.c b/spi.c
index 48e86ec..de7965f 100644
--- a/spi.c
+++ b/spi.c
@@ -173,6 +173,15 @@
 		return ERROR_FLASHPROG_BUG;
 	}
 
+	if ((mst->features & (SPI_MASTER_DUAL | SPI_MASTER_QUAD | SPI_MASTER_DTR_IN)) &&
+	    mst->read == default_spi_read && mst->multicommand == default_spi_send_multicommand) {
+		msg_perr("%s called with incomplete master definition.\n"
+			 "Dual/quad I/O and DTR require multicommand or custom read function.\n"
+			 "Please report a bug at flashprog@flashprog.org\n",
+			 __func__);
+		return ERROR_FLASHPROG_BUG;
+	}
+
 	if (max_rom_decode)
 		rmst.max_rom_decode = max_rom_decode;
 	else
diff --git a/spi25.c b/spi25.c
index 30ccc30..edcaf48 100644
--- a/spi25.c
+++ b/spi25.c
@@ -644,17 +644,48 @@
 	return spi_write_cmd(flash, op, native_4ba, addr, bytes, len, 10);
 }
 
+static const struct spi_read_op *get_spi_read_op(const struct flashctx *flash)
+{
+	static const struct spi_read_op sio_read = { SINGLE_IO_1_1_1, false, JEDEC_READ, 0x00, 0 };
+	static const struct spi_read_op sio_read_4ba = { SINGLE_IO_1_1_1, true, JEDEC_READ_4BA, 0x00, 0 };
+
+	if (flash->spi_fast_read)
+		return flash->spi_fast_read;
+
+	if (flash->chip->feature_bits & FEATURE_4BA_READ && spi_master_4ba(flash))
+		return &sio_read_4ba;
+
+	return &sio_read;
+}
+
 int spi_nbyte_read(struct flashctx *flash, uint8_t *dst, unsigned int address, unsigned int len)
 {
-	const bool native_4ba = flash->chip->feature_bits & FEATURE_4BA_READ && spi_master_4ba(flash);
-	uint8_t cmd[1 + JEDEC_MAX_ADDR_LEN] = { native_4ba ? JEDEC_READ_4BA : JEDEC_READ, };
+	const struct spi_read_op *const read_op = get_spi_read_op(flash);
+	const size_t mode_len = read_op->mode_byte ? 1 : 0;
+	uint8_t cmd_buf[1 + JEDEC_MAX_ADDR_LEN + 1];
 
-	const int addr_len = spi_prepare_address(flash, cmd, native_4ba, address);
+	const int addr_len = spi_prepare_address(flash, cmd_buf, read_op->native_4ba, address);
 	if (addr_len < 0)
 		return 1;
 
-	/* Send Read */
-	return spi_send_command(flash, 1 + addr_len, len, cmd, dst);
+	cmd_buf[0] = read_op->opcode;
+	cmd_buf[addr_len + 1] = read_op->mode_byte;
+
+	struct spi_command cmd[] = {
+	{
+		.io_mode	= read_op->io_mode,
+		.opcode_len	= 1,
+		.address_len	= addr_len,
+		.write_len	= mode_len,
+		.high_z_len	= read_op->dummy_len - mode_len,
+		.read_len	= len,
+		.writearr	= cmd_buf,
+		.readarr	= dst,
+	},
+		NULL_SPI_CMD
+	};
+
+	return spi_send_multicommand(flash, cmd);
 }
 
 /*
diff --git a/spi25_prepare.c b/spi25_prepare.c
index 7482c5c..279f2e4 100644
--- a/spi25_prepare.c
+++ b/spi25_prepare.c
@@ -102,6 +102,42 @@
 	return 0;
 }
 
+static const struct spi_read_op *select_spi_fast_read(const struct flashctx *flash)
+{
+	static const struct {
+		unsigned int feature_check;
+		unsigned int master_check;
+		struct spi_read_op op;
+	#define MIO_CHECKS(flash_feature, master_feature) \
+		FEATURE_FAST_READ_##flash_feature, SPI_MASTER_##master_feature
+	} mio[] = { /*       flash  master                     4BA                              mode  dummies */
+		{ MIO_CHECKS(QIO,  QUAD_IO), { QUAD_IO_1_4_4,  true,  JEDEC_FAST_READ_QIO_4BA,  0xff, 3 } },
+		{ MIO_CHECKS(QOUT, QUAD_IN), { QUAD_OUT_1_1_4, true,  JEDEC_FAST_READ_QOUT_4BA, 0x00, 4 } },
+		{ MIO_CHECKS(DIO,  DUAL_IO), { DUAL_IO_1_2_2,  true,  JEDEC_FAST_READ_DIO_4BA,  0xff, 1 } },
+		{ MIO_CHECKS(DOUT, DUAL_IN), { DUAL_OUT_1_1_2, true,  JEDEC_FAST_READ_DOUT_4BA, 0x00, 2 } },
+		{ MIO_CHECKS(QIO,  QUAD_IO), { QUAD_IO_1_4_4,  false, JEDEC_FAST_READ_QIO,      0xff, 3 } },
+		{ MIO_CHECKS(QOUT, QUAD_IN), { QUAD_OUT_1_1_4, false, JEDEC_FAST_READ_QOUT,     0x00, 4 } },
+		{ MIO_CHECKS(DIO,  DUAL_IO), { DUAL_IO_1_2_2,  false, JEDEC_FAST_READ_DIO,      0xff, 1 } },
+		{ MIO_CHECKS(DOUT, DUAL_IN), { DUAL_OUT_1_1_2, false, JEDEC_FAST_READ_DOUT,     0x00, 2 } },
+	};
+
+	unsigned int i;
+	for (i = 0; i < ARRAY_SIZE(mio); ++i) {
+		if (mio[i].op.native_4ba && !(flash->chip->feature_bits & FEATURE_4BA_FAST_READ))
+			continue;
+		if ((flash->chip->feature_bits & mio[i].feature_check) != mio[i].feature_check)
+			continue;
+		if ((flash->mst.spi->features & mio[i].master_check) != mio[i].master_check)
+			continue;
+		if (mio[i].op.native_4ba && !spi_master_4ba(flash))
+			continue;
+		if (flash->mst.spi->probe_opcode(flash, mio[i].op.opcode))
+			return &mio[i].op;
+	}
+
+	return NULL;
+}
+
 int spi_prepare_io(struct flashctx *const flash, const enum preparation_steps prep)
 {
 	if (prep != PREPARE_FULL)
@@ -115,6 +151,8 @@
 	if (ret)
 		return ret;
 
+	flash->spi_fast_read = select_spi_fast_read(flash);
+
 	return 0;
 }