Busy being born.

Understanding EVM Smart Contract Storage via Yul

The storage space of a smart contract can be visualized as a map from slot[0,22561] to a 32 byte word space at each slot.

In a Solidity smart contract, storage variables occupy slots in the order in which the variables have been serially declared.

contract StorageExample {
	uint256 A; // --> slot-0
	uint256 B; // --> slot-1
	uint256 C; // --> slot-2
	// ....
}

Fixed-size Data Types

Storage variables of fixed-size data types that are declared serially are packed together to occupy the same storage slot -- this is an optimization; storage is expensive.

contract StorageExample {
	// slot-0: 0x-[A-32-bytes]
	uint256 A;
	
	// slot-1: 0x-[B-16-bytes][C-16-bytes]
	uint128 B; // --> slot-1
	uint128 C; // --> slot-1
	
	// slot-3: 0x-[empty-2-bytes][F-6-bytes][E-4-bytes][D-20-bytes]
	address D;
	uint32 E;
	uint48 F;
	
	// slot-4: 0x-[empty-26-bytes][G-6-bytes]
	uint48 G;
	
	// ....
}

Storage variables are only packed in the same slot as long as they don't go over 32 bytes collectively, else the next storage slot is used. Note how no part of G occupies any space in slot-3, and is stored starting at slot-4 even though there is 2 bytes of unused space in slot-3.

The above rules apply to fixed-size data types of size up to 32 bytes -- uint8..256 (1-32 bytes), address (20 bytes), and bool (1 byte).

Arrays

Fixed-size arrays of these fixed-size data types are also stored similarly e.g. an array of 3 uint256 elements declared in storage will occupy 3 storage slots serially.

contract StorageExample {

	uint256[3] fixedUint256Array; // --> slot 0-1-2
	address[3] fixedAddressArray; // --> slot-3-4-5
	bool[5] fixedBoolArray; // --> slot-6 only
	
	constructor() {
		fixedUint256Array = [20, 30, 40];
		fixedAddressArray = [address(0x1), address(0x2), address(0x3)];
		fixedBoolArray = [false, true, false, false, false];
	}
	
	function getSecondElement()
		external 
		view 
		returns (uint256 retUint, address retAddress, bool retBool)
	{
		assembly {
			let index := 1
			
			// Expected: 30
			retUint := sload(add(fixedUint256Array.slot, index))
			// Expected: 0x00..2
			retAddress := sload(add(fixedAddressArray.slot, index))
			
			let fullBoolArray := sload(fixedBoolArray.slot)
			
			let offset := mul(index, 8)
			let mask := shl(offset, 0xff)
			
			// Expected: true
			retBool := and(0xff, shr(offset, and(fullBoolArray, mask)))
		}
	}
}

Here, fixedAddressArray occupies 3 slots since 2 address elements cannot be packed into a single 32 byte storage slot. But, note how fixedBoolArray occupies only 1 slot since 5 bool elements can be packed into a single storage slot.

Dynamic arrays work differently. A dynamic array's length is stored at the slot at which it's declared (len_slot), and its elements are stored in serially ordered slots starting at the slot keccak256(len_slot). The elements of a dynamic array are packed in storage as well -- if they can be packed.

contract StorageExample {
	
	uint256[] dynamicUint256Array; // --> slot-0
	uint64[] dynamicUint64Array; // --> slot-2
	
	constructor() {
		dynamicUint256Array = [20, 30, 40, 50, 60, 70, 80];
		dynamicUint64Array = [0, 1, 2, 3, 4, 5, 6, 7, 8];
	}
	
	function getLen()
		external
		view
		returns (uint256 retLenUint256, uint256 retLenUint64)
	{
		assembly {
			retLenUint256 := sload(dynamicUint256Array.slot)
			retLenUint64 := sload(dynamicUint64Array.slot)
		}
	}
	
	function getAtElementIndex(uint256 index)
		external
		view
		returns (uint256 retUint256, uint64 retUint64)
	{
		assert(dynamicUint256Array.length > index);
		assert(dynamicUint64Array.length > index);
		
		bytes32 slotUint256Array;
		bytes32 slotUint64Array;
		
		assembly {
			slotUint256Array := dynamicUint256Array.slot
			slotUint64Array := dynamicUint64Array.slot
		}
		
		bytes32 locationUint256Array = keccak256(abi.encode(slotUint256Array));
		bytes32 locationUint64Array = keccak256(abi.encode(slotUint64Array));
		
		assembly {
			retUint256 := sload(add(locationUint256Array, index))
			
			// Every slot stores (256 bits / 64 bits) = 4 uint64 elements
			let slotIndex := div(index, 4)
			// Elements are ordered from higher (left) to lower (right) index
			// So: 0x-[elem-3][elem-2][elem-1][elem-0]
			let offset := mul(mod(index, 4), 64)
			
			let temp := sload(add(locationUint64Array, slotIndex))
			let mask := shl(offset, 0xffffffffffffffff)
			
			retUint64 := shr(offset, and(temp, mask))
		}
	}
}

Here too, note how elements are packed in storage slots whenever they can be packed. Storage slots starting at locationUint64Array = keccak256(abi.encode(slotUint64Array)) store 4 uint64 array elements per slot, and the assembly code for reading and writing to the dynamic uint64[] array accounts for this.

Mappings

Each key-value pair in a mapping corresponds to a unique storage slot. The location of this storage slot is calculated as:

contract StorageExample {
	// Location: keccak256(abi.encode(key, slot))
	mapping(key => value) myMap; // --> slot 0
	
	// ...
}

This formula is recursive; the storage location of a (key-1, key-2, value) pair of a nested mapping is similarly calculated as:

contract StorageExample {
	// Location: keccak256(abi.encode(key, slot))
	mapping(key => value) myMap; // --> slot 0
	
	// Location: keccak256(abi.encode(key-2, keccak256(abi.encode(key-1, slot))))
	mapping(key-2 => mapping(key-1 => value)) myNestedMap; // slot --> 1
	
	// ...
}

You calculate the storage location of a (key..., value) coordinate of a mapping to read or overwrite its value.

contract StorageExample {
	
	// slot-0
	mapping(uint256 => address) private ownership;
	// slot-1
	mapping(address => mapping(address => bool)) private approvalForAll;
	
	function getOwner(uint256 tokenId) external view returns (address ret) {
		bytes32 slot;
		assembly {
			slot := ownership.slot
		}
		
		bytes32 loc = keccak256(abi.encode(tokenId, slot));
		assembly {
			ret := sload(loc)
		}
	}
	
	function setOwner(uint256 tokenId, address newOwner) external {
		bytes32 slot;
		assembly {
			slot := ownership.slot
		}
		
		bytes32 loc = keccak256(abi.encode(tokenId, slot));
		assembly {
			sstore(loc, newOwner)
		}
	}
	
	function isApprovedForAll(address owner, address operator) 
		external 
		view
		returns (bool ret)
	{
		bytes32 slot;
		assembly {
			slot := approvalForAll.slot
		}
		
		bytes32 loc = keccak256(
			abi.encode(
				operator, 
				keccak256(abi.encode(owner, slot))
			)
		);
		assembly {
			ret := sload(loc)
		}
	}
	
	function setApprovalForAll
		(address owner, address operator, bool approval) 
		external 
	{
		bytes32 slot;
		assembly {
			slot := approvalForAll.slot
		}
		
		bytes32 loc = keccak256(
			abi.encode(
				operator, 
				keccak256(abi.encode(owner, slot))
			)
		);
		assembly {
			sstore(loc, and(approval, 0xff))
		}
	}
}

Strings

Strings are a dynamic data type. We'll categorize them as "short strings" (strictly less than 32 bytes in length) and "long strings" (greater than or equal to 32 bytes in length).

Short strings are stored just like fixed-size data types. The string data is stored at the slot at which the string storage variable is declared.

Short strings are at most 31 bytes in size. The string data is laid out from left to right bits, with the rightmost byte storing (len×2) which results in the final bit to be 0.

contract StorageExample {

	string private myString = "hello";
	
	function getString() external view returns (string memory ret) {
		assembly {
			// Load the free memory pointer
			ret := mload(0x40)
			
			// Get the raw stored value
			let slotVal := sload(myString.slot)
			
			// The final bit is zero --> short string
			if iszero(and(slotVal, 1)) {
				// Retrieve string data
				let len := div(and(slotVal, 0xff), 2)
				let data := and(slotVal, not(0xff))
				
				// Store length in memory
				mstore(ret, len)
				// Store string data in memory
				mstore(add(ret, 0x20), data)
				
				// Update free memory pointer
				mstore(0x40, add(ret, 0x40))
			}
			
		}
	}
}

If the final bit of the data stored at the string storage slot is 1, that means we're dealing with a long string of length 32 bytes or greater. For long strings, the string storage slot stores (len×2)+1 which results in the final bit being 1.

The string data of long strings is stored at the slot keccak256(original_slot) and the data is laid out from left to right, and is contiguous across storage slots (for strings exceeding 32 bytes in length).

contract StorageExample {

	string private myString = "hello world, my name is longer than 32 bytes";
	
	function getString() external view returns (string memory ret) {
		
		bytes32 originalSlot;
		assembly {
			originalSlot := myString.slot
		}
		
		bytes32 loc = keccak256(abi.encode(originalSlot));
		
		assembly {
			// Load the free memory pointer
			ret := mload(0x40)
			
			// Get the raw stored value
			let slotVal := sload(originalSlot)
			
			// The final bit is zero --> short string
			if iszero(and(slotVal, 1)) {
				// Retrieve string data
				let len := div(and(slotVal, 0xff), 2)
				let data := and(slotVal, not(0xff))
				
				// Store length in memory
				mstore(ret, len)
				// Store string data in memory
				mstore(add(ret, 0x20), data)
				
				// Update free memory pointer
				mstore(0x40, add(ret, 0x40))
			}
			
			// The final bit is one --> long string
			if and(slotVal, 1) {
				// Retrieve string data
				let len := div(sub(slotVal, 1), 2)
				// Store length in memory
				mstore(ret, len)
				
				let totalSlots := add(div(sub(len, 1), 0x20), 1)
				let step := 0
				let memOffset := add(ret, 0x20)
				
				for { let i := 0 } lt(i, totalSlots) { i := add(i, 1) }
				{
					// Get string data at slot
					let val := sload(add(loc, i))
					// Store string data in memory
					mstore(add(memOffset, step), val)
					// Update step
					step := add(step, 0x20)
				}
				
				// Update free memory pointer
				mstore(0x40, add(memOffset, step))
			}
			
		}
	}
}

Structs

Structs are not a data type, rather a Solidity syntax-sugar template. A struct storage variable stores the data of the struct members regularly, in the serial order in which the struct members are declared.

contract StorageExample {
	struct Profile {
		// 1 slot
		address user;
		uint96 uid;
		// 1 slot
		uint256 balance;
	}
	
	// slot-0
	uint256 public totalSupply;
	// slot-1 and slot-2
	Profile public user;
}

Concluding Insight

The most powerful mental model for EVM storage is that these rules compose.

You can traverse any data structure by chaining these operations. A nested mapping is simply a hash of a hash. A struct member inside a mapping is a hash plus an offset.

Slot=keccak256(key,slot)+offset

Regardless of how complex a data structure is—a mapping to a struct containing a dynamic array—determining a specific storage slot is always a sequence of just two fundamental operations:

  1. Add (Offset): used for fixed-size packing (Struct members, fixed arrays, packed primitives).
  2. Hash (Jump): used for dynamic sizing (Mappings, dynamic arrays, long strings).