spi: Prepare for multi i/o and dummy bytes

Multi-i/o commands split SPI transactions into multiple phases that
can be transferred over 1, 2 or 4 wires. For this, we adapt `struct
spi_command` with a new enum, specifying the transfer mode, and ad-
ditional size fields.  While we are at it, move everything related
into a new header file `spi_command.h` so we won't further clutter
`flash.h`.

On the master side, we add respective feature flags for the multi-
i/o modes.

See also the comment in `spi_command.h` about multi-i/o commands.

Change-Id: I79debb845f1c8fec77e0556853ffb01735e73ab8
Signed-off-by: Nico Huber <nico.h@gmx.de>
Reviewed-on: https://review.sourcearcade.org/c/flashprog/+/44
Reviewed-by: Arthur Heymans <arthur@aheymans.xyz>
diff --git a/at45db.c b/at45db.c
index 58c6d10..67692d7 100644
--- a/at45db.c
+++ b/at45db.c
@@ -19,6 +19,7 @@
 #include "flash.h"
 #include "chipdrivers.h"
 #include "programmer.h"
+#include "spi_command.h"
 #include "spi.h"
 
 /* Status register bits */
diff --git a/edi.c b/edi.c
index 5b4b4c9..c1eb64a 100644
--- a/edi.c
+++ b/edi.c
@@ -17,6 +17,7 @@
 #include <string.h>
 #include "flash.h"
 #include "chipdrivers.h"
+#include "spi_command.h"
 #include "ene.h"
 #include "edi.h"
 
diff --git a/ft2232_spi.c b/ft2232_spi.c
index 889db98..a82153d 100644
--- a/ft2232_spi.c
+++ b/ft2232_spi.c
@@ -22,6 +22,7 @@
 #include <ctype.h>
 #include "flash.h"
 #include "programmer.h"
+#include "spi_command.h"
 #include "spi.h"
 #include <ftdi.h>
 
@@ -206,9 +207,9 @@
 		/* commands for CS# assertion and de-assertion: */
 		cmd_len + cmd_len
 		/* commands for either a write, a read or both: */
-		+ (cmd->writecnt && cmd->readcnt ? cmd_len + cmd_len : cmd_len)
+		+ (spi_write_len(cmd) && spi_read_len(cmd) ? cmd_len + cmd_len : cmd_len)
 		/* payload (only writecnt; readcnt concerns another buffer): */
-		+ cmd->writecnt
+		+ spi_write_len(cmd)
 		<= buffer_size;
 }
 
@@ -224,9 +225,11 @@
 	/*
 	 * Minimize FTDI-calls by packing as many commands as possible together.
 	 */
-	for (; cmds->writecnt || cmds->readcnt; cmds++) {
+	for (; !spi_is_empty(cmds); cmds++) {
+		const size_t writecnt = spi_write_len(cmds);
+		const size_t readcnt = spi_read_len(cmds);
 
-		if (cmds->writecnt > 65536 || cmds->readcnt > 65536)
+		if (writecnt > 65536 || readcnt > 65536)
 			return SPI_INVALID_LENGTH;
 
 		if (!ft2232_spi_command_fits(cmds, FTDI_HW_BUFFER_SIZE - i)) {
@@ -241,19 +244,19 @@
 		buf[i++] = spi_data->pindir;
 
 		/* WREN, OP(PROGRAM, ERASE), ADDR, DATA */
-		if (cmds->writecnt) {
+		if (writecnt) {
 			buf[i++] = MPSSE_DO_WRITE | MPSSE_WRITE_NEG;
-			buf[i++] = (cmds->writecnt - 1) & 0xff;
-			buf[i++] = ((cmds->writecnt - 1) >> 8) & 0xff;
-			memcpy(buf + i, cmds->writearr, cmds->writecnt);
-			i += cmds->writecnt;
+			buf[i++] = (writecnt - 1) & 0xff;
+			buf[i++] = ((writecnt - 1) >> 8) & 0xff;
+			memcpy(buf + i, cmds->writearr, writecnt);
+			i += writecnt;
 		}
 
 		/* An optional read command */
-		if (cmds->readcnt) {
+		if (readcnt) {
 			buf[i++] = MPSSE_DO_READ;
-			buf[i++] = (cmds->readcnt - 1) & 0xff;
-			buf[i++] = ((cmds->readcnt - 1) >> 8) & 0xff;
+			buf[i++] = (readcnt - 1) & 0xff;
+			buf[i++] = ((readcnt - 1) >> 8) & 0xff;
 		}
 
 		/* Add final de-assert CS# */
@@ -263,8 +266,7 @@
 		buf[i++] = spi_data->pindir;
 
 		/* continue if there is no read-cmd and further cmds exist */
-		if (!cmds->readcnt &&
-				((cmds + 1)->writecnt || (cmds + 1)->readcnt) &&
+		if (!readcnt && !spi_is_empty(cmds + 1) &&
 				ft2232_spi_command_fits((cmds + 1), FTDI_HW_BUFFER_SIZE - i)) {
 			continue;
 		}
@@ -276,8 +278,8 @@
 			break;
 		}
 
-		if (cmds->readcnt) {
-			ret = get_buf(ftdic, cmds->readarr, cmds->readcnt);
+		if (readcnt) {
+			ret = get_buf(ftdic, cmds->readarr, readcnt);
 			if (ret) {
 				msg_perr("get_buf failed: %i\n", ret);
 				break;
diff --git a/ichspi.c b/ichspi.c
index 0fc96ba..65c357d 100644
--- a/ichspi.c
+++ b/ichspi.c
@@ -25,6 +25,7 @@
 #include "flash.h"
 #include "programmer.h"
 #include "hwaccess_physmap.h"
+#include "spi_command.h"
 #include "spi.h"
 #include "ich_descriptors.h"
 
@@ -1520,8 +1521,8 @@
 	int ret = 0;
 	int i;
 	int oppos, preoppos;
-	for (; (cmds->writecnt || cmds->readcnt) && !ret; cmds++) {
-		if ((cmds + 1)->writecnt || (cmds + 1)->readcnt) {
+	for (; !spi_is_empty(cmds) && !ret; cmds++) {
+		if (!spi_is_empty(cmds + 1)) {
 			/* Next command is valid. */
 			preoppos = find_preop(curopcodes, cmds->writearr[0]);
 			oppos = find_opcode(curopcodes, (cmds + 1)->writearr[0]);
@@ -1546,7 +1547,8 @@
 				 * No need to bother with fixups.
 				 */
 				if (!ichspi_lock) {
-					oppos = reprogram_opcode_on_the_fly((cmds + 1)->writearr[0], (cmds + 1)->writecnt, (cmds + 1)->readcnt);
+					oppos = reprogram_opcode_on_the_fly((cmds + 1)->writearr[0],
+							spi_write_len(cmds + 1), spi_read_len(cmds + 1));
 					if (oppos == -1)
 						continue;
 					curopcodes->opcode[oppos].atomic = preoppos + 1;
@@ -1565,7 +1567,7 @@
 			 * preoppos matched, this is a normal opcode.
 			 */
 		}
-		ret = ich_spi_send_command(flash, cmds->writecnt, cmds->readcnt,
+		ret = ich_spi_send_command(flash, spi_write_len(cmds), spi_read_len(cmds),
 					   cmds->writearr, cmds->readarr);
 		/* Reset the type of all opcodes to non-atomic. */
 		for (i = 0; i < 8; i++)
diff --git a/include/flash.h b/include/flash.h
index 3d899c7..850d607 100644
--- a/include/flash.h
+++ b/include/flash.h
@@ -517,16 +517,5 @@
 #define msg_cspew(...)	print(FLASHPROG_MSG_SPEW, __VA_ARGS__)	/* chip debug spew  */
 void flashprog_progress_add(struct flashprog_flashctx *, size_t progress);
 
-/* spi.c */
-struct spi_command {
-	unsigned int writecnt;
-	unsigned int readcnt;
-	const unsigned char *writearr;
-	unsigned char *readarr;
-};
-#define NULL_SPI_CMD { 0, 0, NULL, NULL, }
-int spi_send_command(const struct flashctx *flash, unsigned int writecnt, unsigned int readcnt, const unsigned char *writearr, unsigned char *readarr);
-int spi_send_multicommand(const struct flashctx *flash, struct spi_command *cmds);
-
 enum chipbustype get_buses_supported(void);
 #endif				/* !__FLASH_H__ */
diff --git a/include/programmer.h b/include/programmer.h
index edef52b..fb19c00 100644
--- a/include/programmer.h
+++ b/include/programmer.h
@@ -299,7 +299,23 @@
 #define SPI_MASTER_4BA			(1U << 0)  /**< Can handle 4-byte addresses */
 #define SPI_MASTER_NO_4BA_MODES		(1U << 1)  /**< Compatibility modes (i.e. extended address
 						        register, 4BA mode switch) don't work */
+#define SPI_MASTER_DUAL_IN		(1U << 2)  /**< Can read two bits at once (bidirectional
+							MOSI and MISO) */
+#define SPI_MASTER_DUAL_IO		(1U << 3)  /**< Can transfer two bits at once (bidirectional
+							MOSI and MISO) */
+#define SPI_MASTER_QUAD_IN		(1U << 4)  /**< Can read four bits at once (bidirectional
+						        MOSI and MISO + IO2 + IO3) */
+#define SPI_MASTER_QUAD_IO		(1U << 5)  /**< Can transfer four bits at once (bidirectional
+						        MOSI and MISO + IO2 + IO3) */
+#define SPI_MASTER_QPI			(1U << 6)  /**< Can send commands with quad i/o */
+#define SPI_MASTER_DTR_IN		(1U << 7)  /**< Double Transfer Rate: Can read two bits
+							per clock cycle per line */
 
+/* Shorthands: */
+#define SPI_MASTER_DUAL			(SPI_MASTER_DUAL_IN | SPI_MASTER_DUAL_IO)
+#define SPI_MASTER_QUAD			(SPI_MASTER_QUAD_IN | SPI_MASTER_QUAD_IO)
+
+struct spi_command;
 struct spi_master {
 	uint32_t features;
 	unsigned int max_data_read; // (Ideally,) maximum data read size in one go (excluding opcode+address).
diff --git a/include/spi_command.h b/include/spi_command.h
new file mode 100644
index 0000000..54dfe48
--- /dev/null
+++ b/include/spi_command.h
@@ -0,0 +1,101 @@
+/*
+ * This file is part of the flashrom project.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __SPI_COMMAND_H__
+#define __SPI_COMMAND_H__ 1
+
+#include <stdlib.h>
+#include <stdbool.h>
+
+/*
+ * Modern SPI flashes support dual and quad i/o modes. However, there are
+ * subtle differences about which parts of a transactions are transferred
+ * in which mode. The transaction is generally divided into three phases:
+ *   * opcode
+ *   * address
+ *   * data
+ *
+ * For each phase, the number of concurrently transferred bits is specified,
+ * hence we get a triple like
+ *   * 1-1-1
+ * which tells us that all three phases are transferred in single i/o
+ * mode. Or, for instance,
+ *   * 1-4-4
+ * which tells us the opcode is transferred in single i/o mode, but
+ * the address and data are transferred in quad i/o mode.
+ *
+ * There are a few common combinations, often chips support all of them:
+ *   * 1-1-1 single i/o
+ *   * 1-1-2 dual output (for reads, only the flash outputs two bits at once)
+ *   * 1-2-2 dual i/o (both controller and flash can transfer two bits at once)
+ *   * 1-1-4 quad output (for reads, only the flash outputs four bits at once)
+ *   * 1-4-4 quad i/o (both controller and flash can transfer four bits at once)
+ *   * 4-4-4 QPI
+ * In all modes that transfer the opcode in single i/o, the opcode tells the
+ * flash what to expect, i.e. how further bytes will be transferred. This
+ * achieves backwards compatibility with simple SPI controllers. The QPI
+ * mode, OTOH, is not backwards compatible and usually needs to be entered
+ * first with a special opcode. In QPI mode, only fast-read instructions
+ * (w/ dummy cycles) are supported; the number of dummy cycles is often
+ * configurable.
+ *
+ * For dual i/o, MOSI and MISO lines are bidirectional. So this can work
+ * without any special setup, if both controller and flash are compatible.
+ *
+ * For quad i/o, usually the flash's /HOLD and /WP pins are re-purposed, and
+ * the controller needs additional pins. The pin muxes inside the flash are
+ * usually controlled by a quad-enable (QE) bit in the status register. This
+ * is *not* to be confused with entering QPI mode. Quad-enable merely says
+ * that the pins are available for data transfer.
+ */
+enum io_mode {
+	SINGLE_IO_1_1_1,
+	DUAL_OUT_1_1_2,
+	DUAL_IO_1_2_2,
+	QUAD_OUT_1_1_4,
+	QUAD_IO_1_4_4,
+	QPI_4_4_4,
+};
+
+struct spi_command {
+	enum io_mode io_mode;
+	size_t opcode_len;	/* bytes to write in opcode i/o phase */
+	size_t address_len;	/* bytes to write in address i/o phase */
+	size_t write_len;	/* bytes to write in data i/o phase */
+	size_t high_z_len;	/* dummy bytes to skip in data i/o phase */
+	size_t read_len;	/* bytes to read in data i/o phase */
+	const unsigned char *writearr;
+	unsigned char *readarr;
+};
+#define NULL_SPI_CMD { 0, 0, 0, 0, 0, 0, NULL, NULL, }
+
+static inline size_t spi_write_len(const struct spi_command *const cmd)
+{
+	return cmd->opcode_len + cmd->address_len + cmd->write_len;
+}
+
+static inline size_t spi_read_len(const struct spi_command *const cmd)
+{
+	return cmd->high_z_len + cmd->read_len;
+}
+
+static inline bool spi_is_empty(const struct spi_command *const cmd)
+{
+	return !spi_write_len(cmd) && !spi_read_len(cmd);
+}
+
+int spi_send_command(const struct flashctx *, unsigned int writecnt, unsigned int readcnt, const unsigned char *writearr, unsigned char *readarr);
+int spi_send_multicommand(const struct flashctx *, struct spi_command *cmds);
+
+#endif				/* !__SPI_COMMAND_H__ */
diff --git a/sfdp.c b/sfdp.c
index e33f7fe..ca64b74 100644
--- a/sfdp.c
+++ b/sfdp.c
@@ -17,6 +17,7 @@
 #include <stdlib.h>
 #include <string.h>
 #include "flash.h"
+#include "spi_command.h"
 #include "spi.h"
 #include "chipdrivers.h"
 
diff --git a/spi.c b/spi.c
index ac51d87..48e86ec 100644
--- a/spi.c
+++ b/spi.c
@@ -24,6 +24,7 @@
 #include "flashchips.h"
 #include "chipdrivers.h"
 #include "programmer.h"
+#include "spi_command.h"
 #include "spi.h"
 
 int spi_send_command(const struct flashctx *flash, unsigned int writecnt,
@@ -46,16 +47,15 @@
 {
 	struct spi_command cmd[] = {
 	{
-		.writecnt = writecnt,
-		.readcnt = readcnt,
+		.io_mode = SINGLE_IO_1_1_1,
+		.opcode_len = 1,
+		.address_len = writecnt - 1,
+		.read_len = readcnt,
 		.writearr = writearr,
 		.readarr = readarr,
-	}, {
-		.writecnt = 0,
-		.writearr = NULL,
-		.readcnt = 0,
-		.readarr = NULL,
-	}};
+	},
+		NULL_SPI_CMD
+	};
 
 	return spi_send_multicommand(flash, cmd);
 }
@@ -64,9 +64,12 @@
 				  struct spi_command *cmds)
 {
 	int result = 0;
-	for (; (cmds->writecnt || cmds->readcnt) && !result; cmds++) {
-		result = spi_send_command(flash, cmds->writecnt, cmds->readcnt,
-					  cmds->writearr, cmds->readarr);
+	for (; !spi_is_empty(cmds) && !result; cmds++) {
+		if (cmds->io_mode != SINGLE_IO_1_1_1)
+			return SPI_FLASHPROG_BUG;
+		result = spi_send_command(flash,
+				spi_write_len(cmds), spi_read_len(cmds),
+				cmds->writearr, cmds->readarr);
 	}
 	return result;
 }
diff --git a/spi25.c b/spi25.c
index f8fdf8e..82fe4a8 100644
--- a/spi25.c
+++ b/spi25.c
@@ -25,6 +25,7 @@
 #include "flashchips.h"
 #include "chipdrivers.h"
 #include "programmer.h"
+#include "spi_command.h"
 #include "spi.h"
 
 static int spi_rdid(struct flashctx *flash, unsigned char *readarr, int bytes)
@@ -312,11 +313,11 @@
 	struct spi_command cmds[] = {
 	{
 		.readarr = 0,
-		.writecnt = 1,
+		.opcode_len = 1,
 		.writearr = (const unsigned char[]){ JEDEC_WREN },
 	}, {
 		.readarr = 0,
-		.writecnt = 1,
+		.opcode_len = 1,
 		.writearr = (const unsigned char[]){ op },
 	},
 		NULL_SPI_CMD,
@@ -346,11 +347,12 @@
 	struct spi_command cmds[] = {
 	{
 		.readarr = 0,
-		.writecnt = 1,
+		.opcode_len = 1,
 		.writearr = (const unsigned char[]){ JEDEC_WREN },
 	}, {
 		.readarr = 0,
-		.writecnt = 2,
+		.opcode_len = 1,
+		.write_len = 1,
 		.writearr = (const unsigned char[]){ op, regdata },
 	},
 		NULL_SPI_CMD,
@@ -423,7 +425,7 @@
 	struct spi_command cmds[] = {
 	{
 		.readarr = 0,
-		.writecnt = 1,
+		.opcode_len = 1,
 		.writearr = (const unsigned char[]){ JEDEC_WREN },
 	}, {
 		.readarr = 0,
@@ -445,7 +447,9 @@
 		return 1;
 
 	memcpy(cmd + 1 + addr_len, out_bytes, out_len);
-	cmds[1].writecnt = 1 + addr_len + out_len;
+	cmds[1].opcode_len  = 1;
+	cmds[1].address_len = addr_len;
+	cmds[1].write_len   = out_len;
 
 	const int result = spi_send_multicommand(flash, cmds);
 	if (result)
diff --git a/spi25_statusreg.c b/spi25_statusreg.c
index 508ee5d..b363b5f 100644
--- a/spi25_statusreg.c
+++ b/spi25_statusreg.c
@@ -19,6 +19,7 @@
 
 #include "flash.h"
 #include "chipdrivers.h"
+#include "spi_command.h"
 #include "spi.h"
 
 /* === Generic functions === */
@@ -138,21 +139,15 @@
 
 	struct spi_command cmds[] = {
 	{
-		.writecnt	= JEDEC_WREN_OUTSIZE,
+		.opcode_len	= JEDEC_WREN_OUTSIZE,
 		.writearr	= &enable_cmd,
-		.readcnt	= 0,
-		.readarr	= NULL,
 	}, {
-		.writecnt	= write_cmd_len,
+		.opcode_len	= 1,
+		.write_len	= write_cmd_len - 1,
 		.writearr	= write_cmd,
-		.readcnt	= 0,
-		.readarr	= NULL,
-	}, {
-		.writecnt	= 0,
-		.writearr	= NULL,
-		.readcnt	= 0,
-		.readarr	= NULL,
-	}};
+	},
+		NULL_SPI_CMD
+	};
 
 	int result = spi_send_multicommand(flash, cmds);
 	if (result) {
diff --git a/spi95.c b/spi95.c
index cbe347b..ee576e2 100644
--- a/spi95.c
+++ b/spi95.c
@@ -21,6 +21,7 @@
 #include <stdlib.h>
 #include "flashchips.h"
 #include "chipdrivers.h"
+#include "spi_command.h"
 #include "spi.h"
 
 /* For ST95XXX chips which have RDID */