Advanced Embedded Systems Programming with ARM Cortex-M
Embedded systems power everything from IoT devices to automotive systems. In this comprehensive guide, we'll explore advanced techniques for programming ARM Cortex-M microcontrollers, including interrupt handling, DMA, RTOS integration, and power optimization.
What Makes Embedded Programming Different?
Embedded programming requires a unique mindset:
Hardware Architecture Overview
ARM Cortex-M processors (M0 through M7) offer:
Getting Started: GPIO and Clock Configuration
Let's begin with proper initialization of an STM32 microcontroller:
#include <stdint.h>
#include <stdbool.h>
// STM32F103 Memory-mapped register definitions
#define RCC_BASE 0x40021000
#define GPIOA_BASE 0x40010800
#define NVIC_BASE 0xE000E100
#define EXTI_BASE 0x40010400
// Register structures for cleaner code
typedef struct {
volatile uint32_t CR; // Clock control register
volatile uint32_t CFGR; // Clock configuration register
volatile uint32_t CIR; // Clock interrupt register
volatile uint32_t APB2RSTR;
volatile uint32_t APB1RSTR;
volatile uint32_t AHBENR;
volatile uint32_t APB2ENR; // APB2 peripheral clock enable
volatile uint32_t APB1ENR;
} RCC_TypeDef;
typedef struct {
volatile uint32_t CRL; // Port configuration register low
volatile uint32_t CRH; // Port configuration register high
volatile uint32_t IDR; // Input data register
volatile uint32_t ODR; // Output data register
volatile uint32_t BSRR; // Bit set/reset register
volatile uint32_t BRR; // Bit reset register
volatile uint32_t LCKR; // Port configuration lock register
} GPIO_TypeDef;
#define RCC ((RCC_TypeDef*)RCC_BASE)
#define GPIOA ((GPIO_TypeDef*)GPIOA_BASE)
// System initialization with proper clock setup
void system_init(void) {
// Enable HSE (High Speed External oscillator)
RCC->CR |= (1 << 16); // HSEON
while(!(RCC->CR & (1 << 17))); // Wait for HSERDY
// Configure PLL: HSE as source, multiply by 9 (8MHz * 9 = 72MHz)
RCC->CFGR |= (1 << 16); // PLLSRC = HSE
RCC->CFGR |= (7 << 18); // PLLMUL = 9
// Enable PLL
RCC->CR |= (1 << 24); // PLLON
while(!(RCC->CR & (1 << 25))); // Wait for PLLRDY
// Set PLL as system clock
RCC->CFGR |= (2 << 0); // SW = PLL
while((RCC->CFGR & (3 << 2)) != (2 << 2)); // Wait for PLL to be used
// Configure flash latency for 72MHz
FLASH->ACR |= (2 << 0); // 2 wait states
}
// Precise microsecond delay using DWT
void delay_us(uint32_t us) {
uint32_t start = DWT->CYCCNT;
uint32_t cycles = us * (SystemCoreClock / 1000000);
while((DWT->CYCCNT - start) < cycles);
}
Advanced Interrupt Handling with NVIC
Interrupts enable real-time response to hardware events:
// External interrupt configuration for button on PA0
void exti_init(void) {
// Enable GPIOA and AFIO clocks
RCC->APB2ENR |= (1 << 2) | (1 << 0); // IOPAEN | AFIOEN
// Configure PA0 as input with pull-up
GPIOA->CRL &= ~(0xF << 0);
GPIOA->CRL |= (0x8 << 0); // Input with pull-up/pull-down
GPIOA->ODR |= (1 << 0); // Enable pull-up
// Connect EXTI0 to PA0
AFIO->EXTICR[0] &= ~(0xF << 0);
AFIO->EXTICR[0] |= (0x0 << 0); // PA0
// Configure EXTI0 for falling edge
EXTI->FTSR |= (1 << 0); // Falling trigger
EXTI->IMR |= (1 << 0); // Unmask interrupt
// Enable EXTI0 interrupt in NVIC
NVIC->ISER[0] |= (1 << 6); // Position 6 for EXTI0
NVIC->IP[6] = (2 << 6); // Priority 2
}
// Interrupt handler with debouncing
volatile uint32_t button_last_time = 0;
#define DEBOUNCE_MS 50
void EXTI0_IRQHandler(void) {
if (EXTI->PR & (1 << 0)) { // Check pending bit
uint32_t current_time = systick_get_ms();
if ((current_time - button_last_time) > DEBOUNCE_MS) {
button_last_time = current_time;
// Handle button press
toggle_led();
}
EXTI->PR |= (1 << 0); // Clear pending bit
}
}
DMA for Efficient Data Transfer
DMA enables high-speed data transfer without CPU intervention:
// DMA configuration for ADC continuous conversion
typedef struct {
volatile uint32_t CCR; // Channel configuration register
volatile uint32_t CNDTR; // Number of data register
volatile uint32_t CPAR; // Peripheral address register
volatile uint32_t CMAR; // Memory address register
} DMA_Channel_TypeDef;
#define DMA1_Channel1 ((DMA_Channel_TypeDef*)0x40020008)
uint16_t adc_buffer[1024]; // Buffer for ADC samples
void dma_adc_init(void) {
// Enable DMA1 clock
RCC->AHBENR |= (1 << 0);
// Configure DMA for ADC1
DMA1_Channel1->CPAR = (uint32_t)&ADC1->DR; // Source: ADC data register
DMA1_Channel1->CMAR = (uint32_t)adc_buffer; // Destination: buffer
DMA1_Channel1->CNDTR = 1024; // Number of transfers
DMA1_Channel1->CCR = 0;
DMA1_Channel1->CCR |= (1 << 10); // Memory increment mode
DMA1_Channel1->CCR |= (1 << 7); // Memory increment enable
DMA1_Channel1->CCR |= (1 << 5); // Circular mode
DMA1_Channel1->CCR |= (1 << 8); // Peripheral size 16-bit
DMA1_Channel1->CCR |= (1 << 10); // Memory size 16-bit
DMA1_Channel1->CCR |= (2 << 12); // High priority
DMA1_Channel1->CCR |= (1 << 1); // Transfer complete interrupt
// Enable DMA channel
DMA1_Channel1->CCR |= (1 << 0);
// Enable DMA interrupt in NVIC
NVIC->ISER[0] |= (1 << 11); // DMA1_Channel1
}
// DMA interrupt handler
void DMA1_Channel1_IRQHandler(void) {
if (DMA1->ISR & (1 << 1)) { // Transfer complete flag
// Process ADC buffer
process_adc_data(adc_buffer, 1024);
DMA1->IFCR |= (1 << 1); // Clear flag
}
}
RTOS Integration with FreeRTOS
For complex applications, an RTOS provides structured multitasking:
#include "FreeRTOS.h"
#include "task.h"
#include "queue.h"
#include "semphr.h"
// Task handles and synchronization
TaskHandle_t sensor_task_handle;
TaskHandle_t control_task_handle;
QueueHandle_t sensor_queue;
SemaphoreHandle_t spi_mutex;
// Sensor reading task
void sensor_task(void* params) {
TickType_t last_wake_time = xTaskGetTickCount();
sensor_data_t data;
while(1) {
// Read sensors every 100ms
data.temperature = read_temperature();
data.humidity = read_humidity();
data.pressure = read_pressure();
data.timestamp = xTaskGetTickCount();
// Send to queue
if (xQueueSend(sensor_queue, &data, 10) != pdTRUE) {
// Handle queue full
error_handler(ERROR_QUEUE_FULL);
}
// Precise timing
vTaskDelayUntil(&last_wake_time, pdMS_TO_TICKS(100));
}
}
// Control task with priority
void control_task(void* params) {
sensor_data_t data;
while(1) {
// Wait for sensor data
if (xQueueReceive(sensor_queue, &data, portMAX_DELAY) == pdTRUE) {
// Take mutex before SPI access
if (xSemaphoreTake(spi_mutex, pdMS_TO_TICKS(100)) == pdTRUE) {
// Process control algorithm
float output = pid_controller(data.temperature, setpoint);
set_actuator(output);
xSemaphoreGive(spi_mutex);
}
}
}
}
// RTOS initialization
void rtos_init(void) {
// Create queue for sensor data
sensor_queue = xQueueCreate(10, sizeof(sensor_data_t));
// Create mutex for SPI bus
spi_mutex = xSemaphoreCreateMutex();
// Create tasks with priorities
xTaskCreate(sensor_task, "Sensor", 256, NULL, 2, &sensor_task_handle);
xTaskCreate(control_task, "Control", 512, NULL, 3, &control_task_handle);
// Start scheduler
vTaskStartScheduler();
}
Power Management Techniques
Minimize power consumption for battery-powered devices:
// Low-power modes configuration
void enter_sleep_mode(void) {
// Configure all unused pins as analog input (lowest power)
GPIOA->CRL = 0x00000000;
GPIOB->CRL = 0x00000000;
// Disable unused peripherals
RCC->APB2ENR &= ~((1<<3)|(1<<4)|(1<<5)); // Disable GPIOB,C,D clocks
// Enter sleep mode
SCB->SCR &= ~(1 << 2); // Clear SLEEPDEEP
__WFI(); // Wait for interrupt
}
// Stop mode with RTC wakeup
void enter_stop_mode(uint32_t seconds) {
// Configure RTC alarm for wakeup
RTC->CRL |= (1 << 4); // Enter config mode
while(!(RTC->CRL & (1 << 5))); // Wait for RTOFF
RTC->ALRH = (seconds >> 16) & 0xFFFF;
RTC->ALRL = seconds & 0xFFFF;
RTC->CRL &= ~(1 << 4); // Exit config mode
while(!(RTC->CRL & (1 << 5)));
// Configure EXTI Line 17 (RTC Alarm)
EXTI->IMR |= (1 << 17);
EXTI->RTSR |= (1 << 17);
// Enter STOP mode
PWR->CR |= (1 << 1); // Clear PDDS bit
PWR->CR |= (1 << 0); // LPDS = 1, regulator in low-power
SCB->SCR |= (1 << 2); // Set SLEEPDEEP
__WFI();
// Restore clocks after wakeup
system_init();
}
// Dynamic frequency scaling
void set_cpu_frequency(cpu_freq_t freq) {
switch(freq) {
case CPU_FREQ_LOW: // 8MHz from HSI
RCC->CFGR = (RCC->CFGR & ~3) | 0; // HSI as system clock
break;
case CPU_FREQ_MED: // 36MHz from PLL
configure_pll(36);
break;
case CPU_FREQ_HIGH: // 72MHz from PLL
configure_pll(72);
break;
}
}
Advanced Communication Protocols
SPI Master Implementation
// Hardware SPI with DMA
void spi_init_master(void) {
// Enable clocks
RCC->APB2ENR |= (1 << 12) | (1 << 2); // SPI1 and GPIOA
// Configure pins: PA5=SCK, PA6=MISO, PA7=MOSI
GPIOA->CRL &= ~(0xFFF << 20);
GPIOA->CRL |= (0xB8B << 20); // Alt function push-pull, Input floating
// Configure SPI
SPI1->CR1 = 0;
SPI1->CR1 |= (3 << 3); // Baud rate = fPCLK/16
SPI1->CR1 |= (1 << 2); // Master mode
SPI1->CR1 |= (0 << 1); // CPOL = 0
SPI1->CR1 |= (1 << 0); // CPHA = 1
SPI1->CR1 |= (1 << 9); // Software NSS
SPI1->CR1 |= (1 << 8); // SSI = 1
SPI1->CR2 |= (1 << 1); // TX DMA enable
SPI1->CR2 |= (1 << 0); // RX DMA enable
SPI1->CR1 |= (1 << 6); // Enable SPI
}
I2C with Error Handling
// Robust I2C communication
i2c_status_t i2c_write_register(uint8_t device_addr, uint8_t reg_addr,
uint8_t* data, uint8_t len) {
uint32_t timeout = I2C_TIMEOUT;
// Generate START
I2C1->CR1 |= (1 << 8);
while(!(I2C1->SR1 & (1 << 0)) && --timeout); // Wait for SB
if (!timeout) return I2C_TIMEOUT_ERROR;
// Send device address + W
I2C1->DR = (device_addr << 1) | 0;
while(!(I2C1->SR1 & (1 << 1)) && --timeout); // Wait for ADDR
if (!timeout) return I2C_TIMEOUT_ERROR;
// Clear ADDR by reading SR2
(void)I2C1->SR2;
// Send register address
I2C1->DR = reg_addr;
while(!(I2C1->SR1 & (1 << 7)) && --timeout); // Wait for TxE
// Send data
for (uint8_t i = 0; i < len; i++) {
I2C1->DR = data[i];
while(!(I2C1->SR1 & (1 << 7)) && --timeout);
}
// Generate STOP
I2C1->CR1 |= (1 << 9);
return I2C_SUCCESS;
}
Memory Management Best Practices
// Static memory pools instead of malloc
typedef struct {
uint8_t buffer[BUFFER_SIZE];
bool in_use;
} buffer_t;
static buffer_t buffer_pool[POOL_SIZE];
void* pool_alloc(void) {
for (int i = 0; i < POOL_SIZE; i++) {
if (!buffer_pool[i].in_use) {
buffer_pool[i].in_use = true;
return buffer_pool[i].buffer;
}
}
return NULL; // Pool exhausted
}
void pool_free(void* ptr) {
buffer_t* buf = container_of(ptr, buffer_t, buffer);
buf->in_use = false;
}
Advanced Debugging Techniques
// Debug output via SWO (Serial Wire Output)
void swo_init(uint32_t baudrate) {
// Enable TPIU clock
RCC->APB2ENR |= (1 << 14);
// Configure SWO pin (PB3)
GPIOB->CRL &= ~(0xF << 12);
GPIOB->CRL |= (0xB << 12); // Alt function push-pull
// Configure ITM
ITM->LAR = 0xC5ACCE55; // Unlock
ITM->TCR = (1 << 0); // Enable ITM
ITM->TER = 0xFFFFFFFF; // Enable all channels
// Configure TPIU
TPIU->SPPR = 2; // NRZ protocol
TPIU->ACPR = SystemCoreClock / baudrate - 1;
}
// Printf-style debugging over SWO
int _write(int file, char *ptr, int len) {
for (int i = 0; i < len; i++) {
while (!(ITM->PORT[0].u32 & 1));
ITM->PORT[0].u8 = ptr[i];
}
return len;
}
// Hardware fault handlers with context
void HardFault_Handler(void) {
// Get fault status registers
uint32_t cfsr = SCB->CFSR;
uint32_t hfsr = SCB->HFSR;
uint32_t mmar = SCB->MMFAR;
uint32_t bfar = SCB->BFAR;
// Save context for debugging
__asm volatile (
"tst lr, #4 \n"
"ite eq \n"
"mrseq r0, msp \n"
"mrsne r0, psp \n"
"b hard_fault_handler_c \n"
);
}
Production-Ready Patterns
// Watchdog timer for reliability
void watchdog_init(uint16_t timeout_ms) {
// Enable IWDG clock
RCC->CSR |= (1 << 0);
// Unlock IWDG registers
IWDG->KR = 0x5555;
// Set prescaler and reload value
IWDG->PR = 4; // Prescaler = 64
IWDG->RLR = (timeout_ms * 40) / 64; // 40kHz LSI clock
// Start watchdog
IWDG->KR = 0xCCCC;
}
// Error handling with diagnostic info
typedef enum {
ERROR_NONE = 0,
ERROR_MEMORY,
ERROR_COMMUNICATION,
ERROR_TIMEOUT,
ERROR_HARDWARE
} error_code_t;
void error_handler(error_code_t error) {
// Save error info to backup registers
RTC->BKP0R = error;
RTC->BKP1R = SystemCoreClock;
RTC->BKP2R = get_stack_pointer();
// Signal error via LED pattern
while(1) {
blink_error_pattern(error);
}
}
Conclusion
Mastering embedded systems programming requires deep understanding of both hardware and software:
Technical Achievements in Our Projects:
Advanced Techniques Covered:
Key Design Patterns:
Performance Metrics:
The techniques demonstrated here have been battle-tested in production systems ranging from wearable devices to industrial controllers. Understanding these concepts enables development of efficient, reliable embedded systems that can run for years on a single battery.
About the Author
The Nordic Oculus team brings together skills in modern software development, cloud architecture, and emerging technologies.
Learn more about our team