Busy being born.

Understanding EVM smart contract memory via Yul

EVM smart contract memory is a byte array, addressable at the byte level -- (0x00 8 bits)-(0x01 8 bits)-(0x....

EVM smart contract storage is addressable in units of 32 bytes. The storage slot at address 0x00 stores 32 bytes of data, and the slot at address 0x01 (address incremented by 1) stores 32 bytes of data, and so on.

Unlike storage, EVM smart contract memory is addressable in units of 1 byte. The memory slot at address 0x00 stores 1 byte of data, and the slot at address 0x01 (address incremented by 1) stores 1 byte of data, and so on.

There are 4 op-codes that read from or write to memory:

Each new smart contract function call gets a fresh execution context with a 'clean slate' memory. Before the execution of a function call, Solidity runs a preamble which sets up memory as follows:

Free memory pointer vs. msize

The free memory pointer (FMP) is incremented whenever we store new data in memory.

contract MemoryExample {
	struct Point {
		uint256 x;
		uint256 y;
	}
	
	event MemoryPointerValue(bytes32);
	
	function inspect() external {
		bytes32 pointerVal;
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: 0x80
		emit MemoryPointerValue(pointerVal);
		
		// Store 0x40 (64) bytes in memory 
		Point memory p = {x: 20, y: 40};
		
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: add(0x80, 0x40) = 0xc0
		emit MemoryPointerValue(pointerVal);
	}
}

In most cases, the free memory pointer (FMP) and value of msize move in tandem.

The highest accessed address in memory and the next free memory location are generally the same, since new data is stored in memory contiguously right after the data stored latest in memory.

However, the difference in the behaviour of FMP and msize is that the FMP is simply a value stored at memory location 0x40 which we can directly manipulate in inline assembly.

This happens outside the knowledge of Solidity (whenever we reference "Solidity" as "Solidity does this or that" what we really mean is "Solidity compiler") and therefore, it is possible to write buggy, unsafe code by not being careful about memory in inline assembly.

contract MemoryExample {
	
	// foo is [x];
	//
	// Since foo lives in memory, Solidity increments free memory pointer value
	// stored at 0x40 by 32 bytes (so, from 0x80 initial val -> 0xa0)
	function breakMemory(uint256[1] memory foo) 
		external
		view
		returns (uint256) 
	{
		// We "accidentally" overwrite the free memory pointer
		assembly {
			mstore(0x40, 0x80)
		}
		
		// Now Solidity stores `6` at (0x80, 0xa0). At the same time for
		// Solidity, foo is stored at (0x80, 0xa0) too --> the location we've
		// just overwritten.
		uint256[1] memory bar = [uint256(6)];
		
		// So, this always returns `6` regardless of the function arg passed.
		return foo[0];
	}
}

The behaviour of msize, on the other hand, is strict. It will return the highest memory address accessed during code execution up till the op code's invocation.

contract MemoryExample {
	struct Point {
		uint256 x;
		uint256 y;
	}
	
	event MemoryPointerAndMsize(bytes32, bytes32);
	
	function inspect() external {
		bytes32 pointerVal;
		bytes32 mSizeVal;
		assembly {
			pointerVal := mload(0x40)
			mSizeVal := msize()
		}
		// Emits: 0x80, 0x60
		//
		// Solidity has written the free memory pointer val
		// in (0x40, 0x60) therefore msize is 0x60 since that's
		// the largest memory address up till which data has been
		// read or written.
		emit MemoryPointerAndMsize(pointerVal, mSizeVal);
		
		
		// Store 0x40 (64) bytes in memory 
		Point memory p = {x: 20, y: 40};
		
		assembly {
			pointerVal := mload(0x40)
			mSizeVal := msize()
		}
		// Emits: 0xc0, 0xc0
		emit MemoryPointerAndMsize(pointerVal, mSizeVal);
		
		assembly {
			pop(mload(0xff))
			pointerVal := mload(0x40)
			mSizeVal := msize()
		}
		// Emits: 0xc0, 0xff
		//
		// Free memory pointer has not changed. However, in assembly (so,
		// bypassing Solidity itself) we read from memory address 0xff. This
		// means the largest address we've read from / written to is 0xff, 
		// which is what msize returns.
		emit MemoryPointerAndMsize(pointerVal, mSizeVal);
		
	}
}

The gas costs for memory access in a transaction are calculated based on the ultimate value of msize, and therefore it is key to not accidentally or unnecessarily access high memory addresses.

Structs and Arrays

We can store structs and arrays in memory.

Storing structs in memory works just like in the case of storage -- structs are a template, not a data type. A struct in memory is just its members stored contiguously in memory.

contract MemoryExample {
	struct Point {
		uint256 x;
		uint256 y;
	}
	
	event MemoryPointerValue(bytes32);
	
	function inspect() external {
		bytes32 pointerVal;
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: 0x80
		emit MemoryPointerValue(pointerVal);
		
		// Store 0x40 (64) bytes in memory 
		Point memory p = {x: 20, y: 40};
		
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: add(0x80, 0x40) = 0xc0
		emit MemoryPointerValue(pointerVal);
	}
}

Fixed-size arrays of fixed-length types are stored in memory just as its elements stored contiguously.

contract MemoryExample {
	event MemoryPointerValue(bytes32);
	
	function inspect() external {
		bytes32 pointerVal;
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: 0x80
		emit MemoryPointerValue(pointerVal);
		
		// Store 0x40 (64) bytes in memory 
		uint256[2] memory fixedArr = [uint256(1), uint256(2)];
		
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: add(0x80, 0x40) = 0xc0
		emit MemoryPointerValue(pointerVal);
	}
}

Dynamic arrays are stored in memory with the first 32 bytes storing the array's length and then the elements contiguously.

contract MemoryExample {
	event Debug(bytes32,bytes32,bytes32,bytes32)
	
	function args(bytes32[] memory x) external {
		bytes32 location;
		bytes32 len;
		bytes32 value0;
		bytes32 value1;
		
		assembly {
			location := x
			len := mload(x)
			value0 := mload(add(x, 0x20))
			value1 := mload(add(x, 0x40))
		}
		emit Debug(location, len, value0, value1);
	}
}

In Yul, if you read a variable from Solidity that is declared in memory, its value is / tells you where it begins in memory, and then you read the actual value in memory via mload.

Because all objects in memory are laid out end to end, there's no "push" for arrays.

The Solidity compiler does not try to pack values less than 32 bytes when loaded in memory (unlike storage).

contract MemoryExample {
	
	uint8[] foo; // 1 storage slot
	
	constructor() {
		foo = [1,2,3,4,5];
	}
	
	function unpack() external view {
		// Total memory occupied:
		// 32 bytes (len) + (len * 32 bytes)
		uint8[] memory unpacked = foo;
	}
}

ABI Encode and EncodePacked

The bytes output of the abi.encode and abi.encodePacked operations is stored in memory. The abi.encode bytes output is such that the first 32 bytes store the length of the ensuing data, and the ensuing bytes store the data -- all without packing variables.

contract MemoryExample {
	struct Point {
		uint256 x;
		uint256 y;
	}
	
	event MemoryPointerValue(bytes32);
	
	function inspect() external {
		bytes32 pointerVal;
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: 0x80
		emit MemoryPointerValue(pointerVal);
		
		// Store 0x60 (96) bytes in memory: len, var_1, var_2 (zero padded)
		bytes memory x = abi.encode(uint256(1), uint128(2));
		
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: add(0x80, 0x60) = 0xe0
		emit MemoryPointerValue(pointerVal);
	}
}

The abi.encodePacked bytes output packs data in as small a length as possible. The output does not store a 32 byte length prefix, unless the output is cast to bytes like in the example.

contract MemoryExample {
	struct Point {
		uint256 x;
		uint256 y;
	}
	
	event MemoryPointerValue(bytes32);
	
	function inspect() external {
		bytes32 pointerVal;
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: 0x80
		emit MemoryPointerValue(pointerVal);
		
		// Store 0x50 (80) bytes in memory 
		bytes memory x = abi.encodePacked(uint256(1), uint128(2));
		
		assembly {
			pointerVal := mload(0x40)
		}
		// Emits: add(0x80, 0x50) = 0xd0
		emit MemoryPointerValue(pointerVal);
	}
}

Returning and Reverting

Returning values from a function call, and reverting with some data in a function call both involve storing data in memory.

contract MemoryExample {
	
	function returnVals() external view returns (uint256, uint256) {
		// Dealing with up to 64 bytes: use scratch space (0x00-0x40)
		assembly {
			mstore(0x00, 1)
			mstore(0x20, 2)
			
			// return specifies: address to start at and length of 
			// following data to return
			return(0x00, 0x40)
		}
	}
}
contract MemoryExample {
	
	function revertFn() external view {
		// Dealing with up to 64 bytes: use scratch space (0x00-0x40)
		assembly {
			if iszero(caller()) {
				// like return, revert specifies: address to start at and length 
				// of following data to return
				revert(0,0)
			}
		}
	}
}

Hash functions

The keccak256 hash function hashes a bytes input of any size, and thus, we pass it a memory location and data length as arguments, to read the intended data to hash from memory.

contract MemoryExample {
	
	function hashData() external view returns (bytes32) {
		assembly {
			let freeMemPointer := mload(0x40)
			
			mstore(freeMemPointer, 1)
			mstore(add(freeMemPointer, 0x20), 2)
			mstore(add(freeMemPointer, 0x40), 3)
			
			mstore(0x40, add(freeMemPointer, 0x60))
			
			mstore(0x00, keccak256(freeMemPointer, 0x60))
			return(0x00,0x20)
		}
	}
}

Log Events

Emitting events may involve memory when the events are emitted with non-indexed data.

contract MemoryExample {
	
	event SomeEvent1(uint256 indexed a, uint256 indexed b);
	event SomeEvent2(uint256 indexed a, bool b);
	
	function emitEvent() external {
		bytes32 someEvent1Topic = 0x...;
		
		assembly {
			log3(0,0,someEvent1Topic, 5, 6)
		}
		
		bytes32 someEventTopic2 = 0x...;
		
		assembly {
			mstore(0x00, 1)
			log2(0x00, 0x20, someEventTopic2, 5)
		}
	}
}