Azure Event Grid esp-mqtt-arduino Client – Finding fail

Still couldn’t figure out why my code was failing so I built a test harness which connected to the wifi, set the time with the Network Time Protocol(NTP), established a Transport Layer Security(TLS) connection with the Azure Event Grid MQTT Broker then finally Authenticated (using Client Certificate authentication). Basically, it was The joy of certs without the Arduino PubSubClient library and with authentication

/*
  Azure Event Grid MQTT Endpoint Probe with mTLS
  - Wi-Fi connect
  - SNTP time sync
  - DNS resolve
  - TCP reachability (port 8883)
  - TLS (server-only) handshake using CRT bundle (or custom CA)
  - TLS (mTLS) handshake with client certificate & private key

  Notes:
    - Client certificate must be PEM and match private key.
    - Private key must be PEM and UNENCRYPTED (no passphrase).
    - SNI uses HOSTNAME automatically; do NOT use raw IP.
*/
#include <Arduino.h>
#include <WiFi.h>
#include <WiFiClient.h>
#include <WiFiClientSecure.h>

#include <../constants.h>
#include <../secrets.h>

extern "C" {
  #include <lwip/netdb.h>
  #include <lwip/sockets.h>
  #include <lwip/inet.h>
  #include <lwip/errno.h>
  #include <time.h>
}
static const char* HOSTNAME  = "ThisIsNotTheMQTTBrokerYouAreLookingFor.newzealandnorth-1.ts.eventgrid.azure.net";
static const uint16_t PORT   = 8883;

// Time servers (for TLS validity window)
static const char* NTP_1 = "pool.ntp.org";
static const char* NTP_2 = "time.cloudflare.com";

static const char* errnoName(int e) {
  switch (e) {
    case 5:   return "EIO";
    case 101: return "ENETUNREACH";
    case 104: return "ECONNRESET";
    case 110: return "ETIMEDOUT";
    case 111: return "ECONNREFUSED";
    case 113: return "EHOSTUNREACH";
    default:  return "?";
  }
}


bool waitForWifi(uint32_t timeout_ms = 20000) {
  uint32_t start = millis();
  Serial.printf("[WiFi] Connecting to '%s'...\n", WIFI_SSID);
  WiFi.begin(WIFI_SSID, WIFI_PASSWORD);
  while (WiFi.status() != WL_CONNECTED && (millis() - start) < timeout_ms) {
    delay(250);
    Serial.print(".");
  }
  Serial.println();
  return WiFi.status() == WL_CONNECTED;
}


void syncTime() {
  configTime(0, 0, NTP_1, NTP_2);
  Serial.println("[NTP] Syncing time...");
  for (int i = 0; i < 20; ++i) {
    time_t now = time(nullptr);
    if (now > 1609459200) { // > Jan 1, 2021
      Serial.printf("[NTP] OK (unix=%ld)\n", (long)now);
      return;
    }
    delay(500);
  }
  Serial.println("[NTP] Time sync may have failed; continuing.");
}

bool probeDNS(const char* host, char outIp[16]) {
  struct addrinfo hints = {};
  hints.ai_family = AF_INET; // IPv4
  struct addrinfo* res = nullptr;

  Serial.printf("[DNS] Resolving %s...\n", host);
  int rc = getaddrinfo(host, NULL, &hints, &res);
  Serial.printf("[DNS] getaddrinfo rc=%d\n", rc);
  if (rc != 0 || !res) {
    Serial.println("[DNS] FAILED");
    return false;
  }
  struct sockaddr_in* sin = (struct sockaddr_in*)res->ai_addr;
  inet_ntop(AF_INET, &sin->sin_addr, outIp, 16);
  Serial.printf("[DNS] %s -> %s\n", host, outIp);
  freeaddrinfo(res);
  return true;
}


bool probeTCP(const char* host, uint16_t port, uint32_t timeout_ms = 5000) {
  WiFiClient cli;
  cli.setTimeout(timeout_ms);
  Serial.printf("[TCP] Connecting to %s:%u ...\n", host, port);
  if (!cli.connect(host, port)) {
    Serial.printf("[TCP] connect() FAILED\n");
    return false;
  }
  Serial.println("[TCP] Connected (no TLS). Closing (probe only).");
  cli.stop();
  return true;
}


bool probeTLS(const char* host, uint16_t port, uint32_t timeout_ms = 7000) {
  WiFiClientSecure tls;
  tls.setTimeout(timeout_ms);

  tls.setCACert(CA_ROOT_PEM);  

  Serial.printf("[TLS] Handshake to %s:%u ...\n", host, port);
  if (!tls.connect(host, port)) {
    int e = errno;
    Serial.printf("[TLS] connect() FAILED errno=%d (%s)\n", e, errnoName(e));
    return false;
  }
  Serial.println("[TLS] Handshake OK (server-only TLS)");
  tls.stop();
  return true;
}

bool probeMTLS(const char* host, uint16_t port, uint32_t timeout_ms = 8000) {
  WiFiClientSecure tls;
  tls.setTimeout(timeout_ms);

  tls.setCACert(CA_ROOT_PEM);
  tls.setCertificate(CLIENT_CERT_PEM);
  tls.setPrivateKey(CLIENT_KEY_PEM);

  Serial.printf("[mTLS] Handshake to %s:%u with client cert ...\n", host, port);
  if (!tls.connect(host, port)) {
    int e = errno;
    Serial.printf("[mTLS] connect() FAILED errno=%d (%s)\n", e, errnoName(e));
    Serial.println("[mTLS] If errno=ETIMEDOUT/ECONNRESET, server may be closing due to cert policy mismatch.");
    return false;
  }
  Serial.println("[mTLS] Handshake OK (client authenticated)");
  tls.stop();
  return true;
}

void setup() {
  Serial.begin(9600);
  delay(5000);
  Serial.println();
  Serial.println("==== Azure Event Grid MQTT Probe (mTLS) ====");

  WiFi.mode(WIFI_STA);

  if (!waitForWifi()) {
    Serial.println("[WiFi] FAILED to connect within timeout");
  } else {
    Serial.printf("[WiFi] Connected. IP=%s  RSSI=%d dBm\n",
                  WiFi.localIP().toString().c_str(), WiFi.RSSI());
  }

  // TLS sanity: time
  syncTime();

  // DNS
  char ip[16] = {0};
  bool dnsOk = probeDNS(HOSTNAME, ip);

  // TCP reachability
  bool tcpOk = probeTCP(HOSTNAME, PORT);

  // TLS (server-only)
  bool tlsOk = probeTLS(HOSTNAME, PORT);

  // TLS (mTLS with client cert/key)
  bool mtlsOk = probeMTLS(HOSTNAME, PORT);

  Serial.println("==== Summary ====");
  Serial.printf("DNS:  %s\n", dnsOk  ? "OK" : "FAILED");
  Serial.printf("TCP:  %s\n", tcpOk  ? "OK" : "FAILED");
  Serial.printf("TLS:  %s\n", tlsOk  ? "OK" : "FAILED");
  Serial.printf("mTLS: %s\n", mtlsOk ? "OK" : "FAILED");
  Serial.println("=================");

  Serial.println("If mTLS=FAILED, check: correct cert/key pair, chain/trust CA, and namespace mTLS policy.");
}

void loop() {
  delay(1000);
}

The test harness worked which meant the issue was with my “re-factoring” of the BasicMqtt5_cert example.

Azure Event Grid esp-mqtt-arduino Client – Hours of fail

I wanted to get other Arduino base clients (e.g. my SeeedStudio XiaoESP32S3) for Azure Event Grid MQTT Broker working (for MQTT 5 support) so installed the esp-mqtt-arduino library.

The library doesn’t support client authentication with certificates, so I added two methods setClientCert and setClientKey to the esp-mqtt-arduino.h and esp-mqtt-arduino.cpp files

class Mqtt5ClientESP32 {
   public:
   Mqtt5ClientESP32();
   ~Mqtt5ClientESP32();
//...
  void useCrtBundle(bool enable = true);
  void setCACert(const char* cert, size_t len = 0);
  void setClientCert(const char* cert, size_t len = 0);
  void setClientKey(const char* key, size_t len = 0);  
  void setInsecure(bool enable = true);
  void setKeepAlive(uint16_t seconds);
private:
void Mqtt5ClientESP32::setClientCert(const char* cert, size_t len)
{
  insecure_ = false;
  cfg_.credentials.authentication.certificate = cert;
  if (cert) {
    cfg_.credentials.authentication.certificate_len = len ? len : strlen(cert) + 1;
  } else {
    cfg_.credentials.authentication.certificate_len = 0;
  }  
  cfg_.broker.verification.skip_cert_common_name_check = false;  
}

void Mqtt5ClientESP32::setClientKey(const char* key, size_t len)
{
  insecure_ = false;
  cfg_.credentials.authentication.key = key;
  if (key) {
    cfg_.credentials.authentication.key_len = len ? len : strlen(key) + 1;
  } else {
    cfg_.credentials.authentication.key_len = 0;
  } 
  cfg_.broker.verification.skip_cert_common_name_check = false;  
}

I had started with the basic_mqtt5_cert example stripping it back to the bare minimum hacking out all the certificate bundle support et.c

#include <WiFi.h>
#include <esp-mqtt-arduino.h>
#include <esp_log.h>
#include "sdkconfig.h"
#include "../secrets.h"
#include "../constants.h"

Mqtt5ClientESP32 mqtt;

volatile bool mqttReady = false;
volatile bool mqttSubscribed = false;
void setup() {
  Serial.begin(9600);
  delay(5000);
  Serial.setDebugOutput(true);
  Serial.println("[BOOT] Starting MQTT5 demo");

  esp_log_level_set("*", ESP_LOG_INFO);
  esp_log_level_set("MQTT_CLIENT", ESP_LOG_VERBOSE);

  WiFi.onEvent([](WiFiEvent_t event, WiFiEventInfo_t info){
    (void)info;
    Serial.printf("[WiFi event] id=%d\n", event);
  });

  Serial.printf("[WiFi] Connecting to %s\n", WIFI_SSID);
  WiFi.begin(WIFI_SSID, WIFI_PASSWORD);

  uint8_t attempts = 0;
  while (WiFi.status() != WL_CONNECTED) {
    Serial.printf("[WiFi] status=%d attempt=%u\n", WiFi.status(), attempts++);
    delay(500);
  }
  Serial.print("[WiFi] Connected, IP: ");
  Serial.println(WiFi.localIP());

  // Sync time for TLS
  Serial.println("\[NTP] synchronising");
  configTime(0, 0, "pool.ntp.org", "time.nist.gov");
  Serial.print("*");
  while (time(nullptr) < 100000) {
    delay(500);
    Serial.print("*");
  }
  Serial.println("\[NTP]  synchronised");

  Serial.printf("[MQTT] Init broker %s as %s\n", MQTT_SERVER_URL,MQTT_CLIENTID);
  mqtt.begin(MQTT_SERVER_URL, MQTT_CLIENTID);
  mqtt.setKeepAlive(45);

  mqtt.setCACert(CA_ROOT_PEM); 
  mqtt.setClientCert(CLIENT_CERT_PEM);
  mqtt.setClientKey(CLIENT_KEY_PEM);
  mqtt.setInsecure(false);

  mqtt.onMessage([](const char* topic, size_t topic_len, const uint8_t* data, size_t len){
    Serial.printf("[MSG] %.*s => %.*s\n", (int)topic_len, topic, (int)len, (const char*)data);
  });
  mqtt.onConnected([]{
    Serial.println("[MQTT] Connected event");
    mqttReady = true;
    Serial.println("[MQTT] Subscribing to ssl/mqtt5");
    if (mqtt.subscribe("ssl/mqtt5", 1, true)) {
      Serial.println("[MQTT] Subscribe request sent");
    } else {
      Serial.println("[MQTT] Subscribe request failed");
    }
  });

  mqtt.onDisconnected([]{
    Serial.println("[MQTT] Disconnected event");
    mqttReady = false;
  });

  Serial.println("[MQTT] Connecting...");
  if (!mqtt.connect()) {
    Serial.println("[MQTT] Connect start failed");
  }
}

void loop() {
  static unsigned long lastPublishMs = 0;
  const unsigned long now = millis();

  if (mqttReady && (now - lastPublishMs) >= 60000) {
    const char* msg = "Hello from Arduino MQTT5 ESP32!";
    Serial.println("[MQTT] Publishing demo message");
    if (mqtt.publish(MQTT_TOPIC_PUBLISH, (const uint8_t*)msg, strlen(msg))) {
      Serial.println("[MQTT] Publish queued (next in ~60s)");
    } else {
      Serial.println("[MQTT] Publish failed");
    }
    lastPublishMs = now;
  }

  delay(10);
}

It was important to put the setClientCert & setClient after the mqtt.begin because it resets the configuration

void Mqtt5ClientESP32::begin(const char* uri, const char* client_id,
                             const char* user, const char* pass, bool use_v5) {
  connected_ = false;
  insecure_ = false;
  cfg_.broker.address.uri = uri;
  if (client_id) cfg_.credentials.client_id = client_id;
  if (user)      cfg_.credentials.username  = user;
  if (pass)      cfg_.credentials.authentication.password = pass;

  cfg_.broker.verification.use_global_ca_store = false;
  cfg_.broker.verification.certificate = nullptr;
  cfg_.broker.verification.certificate_len = 0;
  cfg_.broker.verification.skip_cert_common_name_check = false;
  
  cfg_.session.last_will.topic  = "devices/esp32/lwt";
  cfg_.session.last_will.msg    = "offline";
  cfg_.session.last_will.qos    = 1;
  cfg_.session.last_will.retain = true;

cfg_.session.protocol_ver = 
#if CONFIG_MQTT_PROTOCOL_5
      use_v5 ? MQTT_PROTOCOL_V_5 : MQTT_PROTOCOL_V_3_1_1;
#else
      MQTT_PROTOCOL_V_3_1_1;
  (void)use_v5;  // MQTT v5 support disabled at build time
#endif
}

I tried increasing the log levels to get more debugging information, adding delays on startup to make it easier to see what was going on, trying different options of protocol support.

After hours of trying I gave up.