diff --git a/flash.h b/flash.h
index 5cb7040..ad7c91b 100644
--- a/flash.h
+++ b/flash.h
@@ -122,6 +122,8 @@
 #define FEATURE_4BA_SUPPORT	(1 << 10)
 #define FEATURE_4BA_EXT_ADDR	(1 << 11) /**< Regular 3-byte operations can be used by writing the most
 					       significant address byte into an extended address register. */
+#define FEATURE_4BA_READ	(1 << 12) /**< Native 4BA read instruction (0x13) is supported. */
+#define FEATURE_4BA_WRITE	(1 << 13) /**< Native 4BA byte program (0x12) is supported. */
 
 enum test_state {
 	OK = 0,
diff --git a/flashchips.c b/flashchips.c
index 77fcb90..4bf71c5 100644
--- a/flashchips.c
+++ b/flashchips.c
@@ -9928,7 +9928,7 @@
 		.page_size	= 256,
 		/* supports SFDP */
 		/* OTP: 64B total; read 0x4B, write 0x42 */
-		.feature_bits	= FEATURE_WRSR_WREN | FEATURE_OTP | FEATURE_4BA_SUPPORT,
+		.feature_bits	= FEATURE_WRSR_WREN | FEATURE_OTP | FEATURE_4BA_SUPPORT | FEATURE_4BA_READ | FEATURE_4BA_WRITE,
 		.four_bytes_addr_funcs =
 		{
 			.read_nbyte = spi_nbyte_read_4ba_direct,
@@ -9967,7 +9967,7 @@
 		.page_size	= 256,
 		/* supports SFDP */
 		/* OTP: 64B total; read 0x4B, write 0x42 */
-		.feature_bits	= FEATURE_WRSR_WREN | FEATURE_OTP | FEATURE_4BA_SUPPORT,
+		.feature_bits	= FEATURE_WRSR_WREN | FEATURE_OTP | FEATURE_4BA_SUPPORT | FEATURE_4BA_READ | FEATURE_4BA_WRITE,
 		.four_bytes_addr_funcs =
 		{
 			.read_nbyte = spi_nbyte_read_4ba_direct,
@@ -14851,7 +14851,7 @@
 		/* supports SFDP */
 		/* OTP: 1024B total, 256B reserved; read 0x48; write 0x42, erase 0x44, read ID 0x4B */
 		/* FOUR_BYTE_ADDR: supports 4-bytes addressing mode */
-		.feature_bits	= FEATURE_WRSR_WREN | FEATURE_OTP | FEATURE_4BA_SUPPORT,
+		.feature_bits	= FEATURE_WRSR_WREN | FEATURE_OTP | FEATURE_4BA_SUPPORT | FEATURE_4BA_READ,
 		.four_bytes_addr_funcs =
 		{
 			.set_4ba = spi_enter_4ba_b7_we, /* enter 4-bytes addressing mode by CMD B7 + WREN */
diff --git a/spi25.c b/spi25.c
index 6940394..ce3d184 100644
--- a/spi25.c
+++ b/spi25.c
@@ -24,6 +24,7 @@
 
 #include <stddef.h>
 #include <string.h>
+#include <stdbool.h>
 #include "flash.h"
 #include "flashchips.h"
 #include "chipdrivers.h"
@@ -372,10 +373,10 @@
 	return 0;
 }
 
-static int spi_prepare_address(struct flashctx *const flash,
-			       uint8_t cmd_buf[], const unsigned int addr)
+static int spi_prepare_address(struct flashctx *const flash, uint8_t cmd_buf[],
+			       const bool native_4ba, const unsigned int addr)
 {
-	if (flash->in_4ba_mode) {
+	if (native_4ba || flash->in_4ba_mode) {
 		cmd_buf[1] = (addr >> 24) & 0xff;
 		cmd_buf[2] = (addr >> 16) & 0xff;
 		cmd_buf[3] = (addr >>  8) & 0xff;
@@ -402,6 +403,7 @@
  *
  * @param flash       the flash chip's context
  * @param op          the operation to execute
+ * @param native_4ba  whether `op` always takes a 4-byte address
  * @param addr        the address parameter to `op`
  * @param out_bytes   bytes to send after the address,
  *                    may be NULL if and only if `out_bytes` is 0
@@ -409,8 +411,8 @@
  * @param poll_delay  interval in us for polling WIP
  * @return 0 on success, non-zero otherwise
  */
-static int spi_write_cmd(struct flashctx *const flash,
-			 const uint8_t op, const unsigned int addr,
+static int spi_write_cmd(struct flashctx *const flash, const uint8_t op,
+			 const bool native_4ba, const unsigned int addr,
 			 const uint8_t *const out_bytes, const size_t out_len,
 			 const unsigned int poll_delay)
 {
@@ -426,7 +428,7 @@
 	};
 
 	cmd[0] = op;
-	const int addr_len = spi_prepare_address(flash, cmd, addr);
+	const int addr_len = spi_prepare_address(flash, cmd, native_4ba, addr);
 	if (addr_len < 0)
 		return 1;
 
@@ -469,7 +471,7 @@
 		       unsigned int blocklen)
 {
 	/* This usually takes 100-4000ms, so wait in 100ms steps. */
-	return spi_write_cmd(flash, 0x52, addr, NULL, 0, 100 * 1000);
+	return spi_write_cmd(flash, 0x52, false, addr, NULL, 0, 100 * 1000);
 }
 
 /* Block size is usually
@@ -478,7 +480,7 @@
 int spi_block_erase_c4(struct flashctx *flash, unsigned int addr, unsigned int blocklen)
 {
 	/* This usually takes 240-480s, so wait in 500ms steps. */
-	return spi_write_cmd(flash, 0xc4, addr, NULL, 0, 500 * 1000);
+	return spi_write_cmd(flash, 0xc4, false, addr, NULL, 0, 500 * 1000);
 }
 
 /* Block size is usually
@@ -490,7 +492,7 @@
 		       unsigned int blocklen)
 {
 	/* This usually takes 100-4000ms, so wait in 100ms steps. */
-	return spi_write_cmd(flash, 0xd8, addr, NULL, 0, 100 * 1000);
+	return spi_write_cmd(flash, 0xd8, false, addr, NULL, 0, 100 * 1000);
 }
 
 /* Block size is usually
@@ -500,7 +502,7 @@
 		       unsigned int blocklen)
 {
 	/* This usually takes 100-4000ms, so wait in 100ms steps. */
-	return spi_write_cmd(flash, 0xd7, addr, NULL, 0, 100 * 1000);
+	return spi_write_cmd(flash, 0xd7, false, addr, NULL, 0, 100 * 1000);
 }
 
 /* Page erase (usually 256B blocks) */
@@ -508,7 +510,7 @@
 {
 	/* This takes up to 20ms usually (on worn out devices
 	   up to the 0.5s range), so wait in 1ms steps. */
-	return spi_write_cmd(flash, 0xdb, addr, NULL, 0, 1 * 1000);
+	return spi_write_cmd(flash, 0xdb, false, addr, NULL, 0, 1 * 1000);
 }
 
 /* Sector size is usually 4k, though Macronix eliteflash has 64k */
@@ -516,19 +518,19 @@
 		       unsigned int blocklen)
 {
 	/* This usually takes 15-800ms, so wait in 10ms steps. */
-	return spi_write_cmd(flash, 0x20, addr, NULL, 0, 10 * 1000);
+	return spi_write_cmd(flash, 0x20, false, addr, NULL, 0, 10 * 1000);
 }
 
 int spi_block_erase_50(struct flashctx *flash, unsigned int addr, unsigned int blocklen)
 {
 	/* This usually takes 10ms, so wait in 1ms steps. */
-	return spi_write_cmd(flash, 0x50, addr, NULL, 0, 1 * 1000);
+	return spi_write_cmd(flash, 0x50, false, addr, NULL, 0, 1 * 1000);
 }
 
 int spi_block_erase_81(struct flashctx *flash, unsigned int addr, unsigned int blocklen)
 {
 	/* This usually takes 8ms, so wait in 1ms steps. */
-	return spi_write_cmd(flash, 0x81, addr, NULL, 0, 1 * 1000);
+	return spi_write_cmd(flash, 0x81, false, addr, NULL, 0, 1 * 1000);
 }
 
 int spi_block_erase_60(struct flashctx *flash, unsigned int addr,
@@ -601,15 +603,18 @@
 
 static int spi_nbyte_program(struct flashctx *flash, unsigned int addr, const uint8_t *bytes, unsigned int len)
 {
-	return spi_write_cmd(flash, JEDEC_BYTE_PROGRAM, addr, bytes, len, 10);
+	const bool native_4ba = !!(flash->chip->feature_bits & FEATURE_4BA_WRITE);
+	const uint8_t op = native_4ba ? JEDEC_BYTE_PROGRAM_4BA : JEDEC_BYTE_PROGRAM;
+	return spi_write_cmd(flash, op, native_4ba, addr, bytes, len, 10);
 }
 
 int spi_nbyte_read(struct flashctx *flash, unsigned int address, uint8_t *bytes,
 		   unsigned int len)
 {
-	uint8_t cmd[1 + JEDEC_MAX_ADDR_LEN] = { JEDEC_READ, };
+	const bool native_4ba = !!(flash->chip->feature_bits & FEATURE_4BA_READ);
+	uint8_t cmd[1 + JEDEC_MAX_ADDR_LEN] = { native_4ba ? JEDEC_READ_4BA : JEDEC_READ, };
 
-	const int addr_len = spi_prepare_address(flash, cmd, address);
+	const int addr_len = spi_prepare_address(flash, cmd, native_4ba, address);
 	if (addr_len < 0)
 		return 1;
 
@@ -781,7 +786,7 @@
 		//return SPI_GENERIC_ERROR;
 	}
 
-	result = spi_write_cmd(flash, JEDEC_AAI_WORD_PROGRAM, start, buf + pos - start, 2, 10);
+	result = spi_write_cmd(flash, JEDEC_AAI_WORD_PROGRAM, false, start, buf + pos - start, 2, 10);
 	if (result)
 		goto bailout;
 
