#!/usr/bin/env python
"""
CSV to AWS IoT MQTT Publisher for Air Monitor Data (Standalone Version)

This is a standalone version with embedded certificates.
No need for the terraair-esp32 repository.

Reads CSV files from the Air Monitor and publishes each row as a JSON message
to AWS IoT MQTT in the same format as the device would.

Topic format: AM/AM-####/sensors

Usage:
    python csv_to_mqtt_standalone.py --csv AM-6041_1735689600.csv
    python csv_to_mqtt_standalone.py --dir ./csv_files
"""

import csv
import json
import os
import sys
import time
import argparse
import tempfile
from pathlib import Path
from datetime import datetime

try:
    from awscrt import mqtt
    from awsiot import mqtt_connection_builder
except ImportError:
    print("Error: AWS IoT SDK not installed.")
    print("Install with: pip install awsiotsdk")
    sys.exit(1)


# ============================================================================
# EMBEDDED CERTIFICATES
# ============================================================================
# These certificates are embedded directly in the script for portability
# Last Updated: January 20, 2026

AWS_CA_BUNDLE = """-----BEGIN CERTIFICATE-----
MIIDQTCCAimgAwIBAgITBmyfz5m/jAo54vB4ikPmljZbyjANBgkqhkiG9w0BAQsF
ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6
b24gUm9vdCBDQSAxMB4XDTE1MDUyNjAwMDAwMFoXDTM4MDExNzAwMDAwMFowOTEL
MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJv
b3QgQ0EgMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBALJ4gHHKeNXj
ca9HgFB0fW7Y14h29Jlo91ghYPl0hAEvrAIthtOgQ3pOsqTQNroBvo3bSMgHFzZM
9O6II8c+6zf1tRn4SWiw3te5djgdYZ6k/oI2peVKVuRF4fn9tBb6dNqcmzU5L/qw
IFAGbHrQgLKm+a/sRxmPUDgH3KKHOVj4utWp+UhnMJbulHheb4mjUcAwhmahRWa6
VOujw5H5SNz/0egwLX0tdHA114gk957EWW67c4cX8jJGKLhD+rcdqsq08p8kDi1L
93FcXmn/6pUCyziKrlA4b9v7LWIbxcceVOF34GfID5yHI9Y/QCB/IIDEgEw+OyQm
jgSubJrIqg0CAwEAAaNCMEAwDwYDVR0TAQH/BAUwAwEB/zAOBgNVHQ8BAf8EBAMC
AYYwHQYDVR0OBBYEFIQYzIU07LwMlJQuCFmcx7IQTgoIMA0GCSqGSIb3DQEBCwUA
A4IBAQCY8jdaQZChGsV2USggNiMOruYou6r4lK5IpDB/G/wkjUu0yKGX9rbxenDI
U5PMCCjjmCXPI6T53iHTfIUJrU6adTrCC2qJeHZERxhlbI1Bjjt/msv0tadQ1wUs
N+gDS63pYaACbvXy8MWy7Vu33PqUXHeeE6V/Uq2V8viTO96LXFvKWlJbYK8U90vv
o/ufQJVtMVT8QtPHRh8jrdkPSHCa2XV4cdFyQzR1bldZwgJcJmApzyMZFo6IQ6XU
5MsI+yMRQ+hDKXJioaldXgjUkK642M4UwtBV8ob2xJNDd2ZhwLnoQdeXeGADbkpy
rqXRfboQnoZsG4q5WTP468SQvvG5
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIEXjCCA0agAwIBAgITB3MSOAudZoijOx7Zv5zNpo4ODzANBgkqhkiG9w0BAQsF
ADA5MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6
b24gUm9vdCBDQSAxMB4XDTIyMDgyMzIyMjEyOFoXDTMwMDgyMzIyMjEyOFowPDEL
MAkGA1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEcMBoGA1UEAxMTQW1hem9uIFJT
QSAyMDQ4IE0wMTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAOtxLKnL
H4gokjIwr4pXD3i3NyWVVYesZ1yX0yLI2qIUZ2t88Gfa4gMqs1YSXca1R/lnCKeT
epWSGA+0+fkQNpp/L4C2T7oTTsddUx7g3ZYzByDTlrwS5HRQQqEFE3O1T5tEJP4t
f+28IoXsNiEzl3UGzicYgtzj2cWCB41eJgEmJmcf2T8TzzK6a614ZPyq/w4CPAff
nAV4coz96nW3AyiE2uhuB4zQUIXvgVSycW7sbWLvj5TDXunEpNCRwC4kkZjK7rol
jtT2cbb7W2s4Bkg3R42G3PLqBvt2N32e/0JOTViCk8/iccJ4sXqrS1uUN4iB5Nmv
JK74csVl+0u0UecCAwEAAaOCAVowggFWMBIGA1UdEwEB/wQIMAYBAf8CAQAwDgYD
VR0PAQH/BAQDAgGGMB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAdBgNV
HQ4EFgQUgbgOY4qJEhjl+js7UJWf5uWQE4UwHwYDVR0jBBgwFoAUhBjMhTTsvAyU
lC4IWZzHshBOCggwewYIKwYBBQUHAQEEbzBtMC8GCCsGAQUFBzABhiNodHRwOi8v
b2NzcC5yb290Y2ExLmFtYXpvbnRydXN0LmNvbTA6BggrBgEFBQcwAoYuaHR0cDov
L2NydC5yb290Y2ExLmFtYXpvbnRydXN0LmNvbS9yb290Y2ExLmNlcjA/BgNVHR8E
ODA2MDSgMqAwhi5odHRwOi8vY3JsLnJvb3RjYTEuYW1hem9udHJ1c3QuY29tL3Jv
b3RjYTEuY3JsMBMGA1UdIAQMMAowCAYGZ4EMAQIBMA0GCSqGSIb3DQEBCwUAA4IB
AQCtAN4CBSMuBjJitGuxlBbkEUDeK/pZwTXv4KqPK0G50fOHOQAd8j21p0cMBgbG
kfMHVwLU7b0XwZCav0h1ogdPMN1KakK1DT0VwA/+hFvGPJnMV1Kx2G4S1ZaSk0uU
5QfoiYIIano01J5k4T2HapKQmmOhS/iPtuo00wW+IMLeBuKMn3OLn005hcrOGTad
hcmeyfhQP7Z+iKHvyoQGi1C0ClymHETx/chhQGDyYSWqB/THwnN15AwLQo0E5V9E
SJlbe4mBlqeInUsNYugExNf+tOiybcrswBy8OFsd34XOW3rjSUtsuafd9AWySa3h
xRRrwszrzX/WWGm6wyB+f7C4
-----END CERTIFICATE-----
-----BEGIN CERTIFICATE-----
MIIBtjCCAVugAwIBAgITBmyf1XSXNmY/Owua2eiedgPySjAKBggqhkjOPQQDAjA5
MQswCQYDVQQGEwJVUzEPMA0GA1UEChMGQW1hem9uMRkwFwYDVQQDExBBbWF6b24g
Um9vdCBDQSAzMB4XDTE1MDUyNjAwMDAwMFoXDTQwMDUyNjAwMDAwMFowOTELMAkG
A1UEBhMCVVMxDzANBgNVBAoTBkFtYXpvbjEZMBcGA1UEAxMQQW1hem9uIFJvb3Qg
Q0EgMzBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCmXp8ZBf8ANm+gBG1bG8lKl
ui2yEujSLtf6ycXYqm0fc4E7O5hrOXwzpcVOho6AF2hiRVd9RFgdszflZwjrZt6j
QjBAMA8GA1UdEwEB/wQFMAMBAf8wDgYDVR0PAQH/BAQDAgGGMB0GA1UdDgQWBBSr
ttvXBp43rDCGB5Fwx5zEGbF4wDAKBggqhkjOPQQDAgNJADBGAiEA4IWSoxe3jfkr
BqWTrBqYaGFy+uGh0PsceGCmQ5nFuMQCIQCcAu/xlJyzlvnrxir4tiz+OpAUFteM
YyRIHN8wfdVoOw==
-----END CERTIFICATE-----"""

DEVICE_CERT = """-----BEGIN CERTIFICATE-----
MIIDWTCCAkGgAwIBAgIUS6t3VblkVv6exR94j2C6OfQYuPUwDQYJKoZIhvcNAQEL
BQAwTTFLMEkGA1UECwxCQW1hem9uIFdlYiBTZXJ2aWNlcyBPPUFtYXpvbi5jb20g
SW5jLiBMPVNlYXR0bGUgU1Q9V2FzaGluZ3RvbiBDPVVTMB4XDTI2MDEyMDAwMjY0
NVoXDTQ5MTIzMTIzNTk1OVowHjEcMBoGA1UEAwwTQVdTIElvVCBDZXJ0aWZpY2F0
ZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAOOhXm+AqC0RFOVNeIeO
Ho8mDllysb8HVMBj1LOHj9HBxOlWHag+6OCGwhBF1oZt2m5kwTfOGolXjsVhLmkJ
4HFoLXvswOgy6aXkxkFRyoKQcRezqhyLWnZs1VoJ/xRBgRPyOIjulHpMxyNIXQOm
jW/bpvZwNwvKbTlu4q5FFSiEGRJeacHn6gxSQL2UzOU/E2H/HdpdUgWzlDi62ZMZ
i3L9N3bWs2Mu/qH8Dj6GytHtWiTeQvrYocrEeklfnvtDJNbpJ1y9KzjsnhY38vB+
PHlPom9Wt+7WoOL68giyCIo6CVoTP/b99oiizrCPQVtZMBq9uBnDAJWI1OXrsptz
uekCAwEAAaNgMF4wHwYDVR0jBBgwFoAUoXZmFNTGTo5QkP+XYrlrFKf7iTQwHQYD
VR0OBBYEFADXAPkaC2W5892X9PTERzrt1rf6MAwGA1UdEwEB/wQCMAAwDgYDVR0P
AQH/BAQDAgeAMA0GCSqGSIb3DQEBCwUAA4IBAQBfKPbUdCmhXiPLy3Ye19GlITwj
zADe8UGNOh74Rqh2E6liG0wskRLsniJODmHXqS71SHN3aG+7Jj29uDrkTOd9Ljz2
DrXxpnPXWElSEZoRUkB+GOd7BMog/4gARjJ51PnSZrG6aVYxspj5v/aE79FJxoDz
EusNtDHHp8ltn9oJa58eeinx1VMlS/tkiw8THLL+QL25KZRrEXZ1PZrn16r6YjQR
sdDB7ItNPCZH8TDzMAZRq7omvdGV+yp1lqBWl+g8YTTlwKFwibPHRGdxXyFCKibc
8TLWyTLiTTXJYevWz75h8MyUFSefqb/1j000TB460xHNyqf+2jwNzqgMwFJ+
-----END CERTIFICATE-----"""

DEVICE_PRIVATE_KEY = """-----BEGIN RSA PRIVATE KEY-----
MIIEogIBAAKCAQEA46Feb4CoLREU5U14h44ejyYOWXKxvwdUwGPUs4eP0cHE6VYd
qD7o4IbCEEXWhm3abmTBN84aiVeOxWEuaQngcWgte+zA6DLppeTGQVHKgpBxF7Oq
HItadmzVWgn/FEGBE/I4iO6UekzHI0hdA6aNb9um9nA3C8ptOW7irkUVKIQZEl5p
wefqDFJAvZTM5T8TYf8d2l1SBbOUOLrZkxmLcv03dtazYy7+ofwOPobK0e1aJN5C
+tihysR6SV+e+0Mk1uknXL0rOOyeFjfy8H48eU+ib1a37tag4vryCLIIijoJWhM/
9v32iKLOsI9BW1kwGr24GcMAlYjU5euym3O56QIDAQABAoIBAArtUiU0Hd6b9U7U
mzaWoezpPMMaeGHdLjpWEH3Vwheln5g75AyRiIhOr74sPBIT7+pgt82//RIeu+HJ
unqcWMPY5mYOhWwgFYs7IjE8heYinOswWrgoEF2i99Lsq+wDAPk+JWydV09PpDeT
X7OvKRb3ilpGJL5SJ1AcfJE1C8D4DzOmyQYXhmf1m3xET+2ZajQkSuFdh120kwuc
aOCCDuMurk9OimfOlZucthoozV01pHFNvX1j/kCK0vND1gmb0u8vrcL4R56ISt0n
zBPuXI/IPgnzlDcs8XZog2+GpBBFusZiOrJAciyuO1PxzwvbKsp6re5ix9MfnWFa
joEdHgECgYEA+YuTAWXtYD5HlLFEmErtgj0Y3GL4AvQmAu0vY0g4eWfRRWD47xYW
LIRtZ58JMPbseyDOt8yTv0Axgasaw2LgaSLnlr/erj4qkT+GkcjuhR4jXp4NLeqS
DuwC4HGjLC2CLbDnnk2jAx1mo6crmFR/IuVmF72FBVaBRiApyw5tmIkCgYEA6YSu
DvCvJW6r1gwBGErKA+qYiuSQs97xmHDbqASozzd5Sp7UWdY2Kagw7qYW8mN3N09l
vBNLlitxUu9qWKEvbIa2djHw/kv5wi7Y/UAZr16wGqB+lLJP6RY34MNRnsUOMO5B
g61Uu1esIww56qQMH/P2G6KnQtzGvmDcHkKt/mECgYAbM8PKmznNC/zxkIFRuJk7
YT2ciI1QhRy/pOtNf0noshB156Q7K8UvBSCH8yYlWEVfBqyCDKLubij3YZeBRP4B
d/SW0kOL6uro+mvI6IZXrHlejwwQvUk+pyuJG9oVrsBKCsa00vG6HTJbFJYD/PH+
89K1M8uUUWG2KztjDyeTQQKBgFQBPvxw6RD2NyrRNrOec/3kruuIPeX3FcJZFyvi
QTgKJMALUyjePq3Nq4yugyK/VWu4TEjUSCTFjAbhycgIdB7+dzUd0iwo+Lg6UiUI
ojTMQ13Qzrh/TO/05jUdDoenK+u+SiMsf4bFi08wXsViLlpwUOCwKquK8JQcFBNu
uVkBAoGAR9rDpYkKeo4txMLDDmT4spxgpi8Mzj+ic9sboPgbU6D4p7JplgknEbcO
yq4mnHIFzGYhSKu6XhOFL/dW8z7QF/JgDJN3Ds2KxHqVYulAfJ+jqxIG9/llT02Z
zfyNhCydiEoG9cXrA71VFFRYpqDQArYbyW+azwlNbsdH4vQGUBA=
-----END RSA PRIVATE KEY-----"""

# ============================================================================


def extract_device_id_from_filename(filename):
    """Extract device ID from Air Monitor CSV filename.
    Format: {DEVICE_ID}_{TIMESTAMP}.csv
    Example: AM-6028_1735689600.csv -> AM-6028
    """
    filename = Path(filename).name
    if filename.endswith('.csv'):
        filename = filename[:-4]
    parts = filename.split('_')
    if len(parts) >= 2:
        device_id = parts[0]
        if device_id.startswith('AM-') or device_id.startswith('AM_'):
            return device_id
    return None


def extract_device_id_from_csv_content(csv_path):
    """Extract device ID from the first row of CSV data."""
    try:
        with open(csv_path, 'r', encoding='utf-8') as f:
            for line in f:
                line = line.strip()
                if not line or line.startswith('device_id') or line.startswith('Device'):
                    continue
                fields = line.split(',')
                if len(fields) > 0 and fields[0]:
                    return fields[0]
    except Exception as e:
        print(f"Warning: Could not extract device ID from CSV content: {e}")
    return None


def create_temp_cert_files():
    """Create temporary certificate files from embedded strings."""
    temp_dir = tempfile.mkdtemp(prefix='aws_iot_certs_')
    
    ca_path = Path(temp_dir) / 'ca.crt'
    cert_path = Path(temp_dir) / 'client.crt'
    key_path = Path(temp_dir) / 'private.key'
    
    with open(ca_path, 'w') as f:
        f.write(AWS_CA_BUNDLE)
    with open(cert_path, 'w') as f:
        f.write(DEVICE_CERT)
    with open(key_path, 'w') as f:
        f.write(DEVICE_PRIVATE_KEY)
    
    return str(ca_path), str(cert_path), str(key_path)


# Default CSV header for files without headers
DEFAULT_CSV_HEADER = "device_id,timestamp,loopcounter,is_engineering,cpu_temp,battery_voltage,battery_gauge,battery_crate,bme_err,temperature,humidity,pressure,bmeAQI,windSpeed,winDir,voc_err,tvoc,tvoc_max,tvoc_ppb,mos_co2,pm_err,pm1.0,pm2.5,pm10.0,pmAQI,ir_err,ir_c1,ir_c1_max,ir_co2,ir_co2_max,pid_tvoc,pid_tvoc_max,mps_err,mps_c1,mps_c1_max,spec_err,h2s,h2s_max,ozone,ozone_max,so2,so2_max,no2,no2_max,nh3_err,co,mos_no2,mos_nh3,mos_nh3_max,errors,temperature_dx,humidity_dx,c1_signal,c1_base,c1_top,c1_dx,c1_found,h2s_signal,h2s_base,h2s_top,h2s_dx,h2s_found,voc_signal,voc_base,voc_top,voc_dx,voc_found,voc_nocomp_max,summa_triggered"



def csv_row_to_json(row, header_map):
    """Convert a CSV row to the JSON format used by the Air Monitor.
    
    Args:
        row: CSV row as a string
        header_map: Dictionary mapping field names to column indices
    
    Returns:
        Dictionary with device_id, timestamp, and sensor values
    """
    fields = row.strip().split(',')
    json_data = {}
    
    # Fields to include in the JSON output (maps CSV header name to JSON key name)
    # Most fields use the same name, but some are mapped differently
    field_mapping = {
        # Core fields (handled separately)
        'device_id': None,  # Always included at top level
        'timestamp': None,  # Always included at top level
        
        # Sensor fields to include
        'loopcounter': 'loopcounter',
        'cpu_temp': 'cpu_temp',
        'battery_voltage': 'battery_voltage',
        'battery_gauge': 'battery_gauge',
        'battery_crate': 'battery_crate',
        'temperature': 'temperature',
        'humidity': 'humidity',
        'pressure': 'pressure',
        'bmeAQI': 'bmeAQI',
        'windSpeed': 'windSpeed',
        'winDir': 'winDir',
        'tvoc': 'tvoc',
        'tvoc_max': 'tvoc_max',
        'tvoc_ppb': 'tvoc_ppb',
        'mos_co2': 'mos_co2',
        'pm1.0': 'pm1.0',
        'pm2.5': 'pm2.5',
        'pm10.0': 'pm10.0',
        'pmAQI': 'pmAQI',
        'ir_c1': 'ir_c1',
        'ir_c1_max': 'ir_c1_max',
        'ir_co2': 'ir_co2',
        'ir_co2_max': 'ir_co2_max',
        'pid_tvoc': 'pid_tvoc',
        'pid_tvoc_max': 'pid_tvoc_max',
        'mps_c1': 'mps_c1',
        'mps_c1_max': 'mps_c1_max',
        'h2s': 'h2s',
        'h2s_max': 'h2s_max',
        'ozone': 'ozone',
        'ozone_max': 'ozone_max',
        'so2': 'so2',
        'so2_max': 'so2_max',
        'no2': 'no2',
        'no2_max': 'no2_max',
        'mos_nh3': 'mos_nh3',
        'mos_nh3_max': 'mos_nh3_max',
        'errors': 'errors',
        'voc_signal': 'voc_signal',
    }
    
    # Integer fields (all others are float)
    integer_fields = {'loopcounter', 'errors'}
    
    try:
        # Always include device_id and timestamp at top level
        if 'device_id' in header_map:
            idx = header_map['device_id']
            if len(fields) > idx and fields[idx]:
                json_data["device_id"] = fields[idx]
        
        if 'timestamp' in header_map:
            idx = header_map['timestamp']
            if len(fields) > idx and fields[idx]:
                try:
                    json_data["timestamp"] = int(fields[idx])
                except (ValueError, TypeError) as e:
                    print(f"Warning: Could not parse timestamp '{fields[idx]}': {e}")
                    json_data["timestamp"] = 0
        
        # Add all sensor fields
        for csv_field, json_field in field_mapping.items():
            if json_field is None:  # Skip device_id and timestamp (already handled)
                continue
                
            if csv_field in header_map:
                idx = header_map[csv_field]
                if len(fields) > idx and fields[idx]:
                    try:
                        # Determine if this is an integer or float field
                        value_type = int if csv_field in integer_fields else float
                        value = value_type(fields[idx])
                        json_data[json_field] = {"value": value}
                    except (ValueError, TypeError):
                        pass  # Skip fields that can't be converted
        
    except Exception as e:
        print(f"Warning: Error parsing CSV row: {e}")
    
    return json_data



class CSVToMQTTPublisher:
    def __init__(self, endpoint, cert_path, key_path, ca_path, client_id):
        self.endpoint = endpoint
        self.cert_path = cert_path
        self.key_path = key_path
        self.ca_path = ca_path
        self.client_id = client_id
        self.mqtt_connection = None
        self.connected = False
        
    def connect(self):
        print(f"Connecting to {self.endpoint} with client ID '{self.client_id}'...")
        self.mqtt_connection = mqtt_connection_builder.mtls_from_path(
            endpoint=self.endpoint,
            cert_filepath=self.cert_path,
            pri_key_filepath=self.key_path,
            ca_filepath=self.ca_path,
            client_id=self.client_id,
            clean_session=False,
            keep_alive_secs=30
        )
        connect_future = self.mqtt_connection.connect()
        connect_future.result()
        self.connected = True
        print("✓ Connected successfully!")
        
    def disconnect(self):
        if self.mqtt_connection and self.connected:
            print("Disconnecting...")
            disconnect_future = self.mqtt_connection.disconnect()
            disconnect_future.result()
            self.connected = False
            
    def publish_message(self, topic, message, qos=1):
        if not self.connected:
            raise Exception("Not connected to MQTT broker")
        payload = json.dumps(message) if isinstance(message, dict) else str(message)
        publish_future, packet_id = self.mqtt_connection.publish(
            topic=topic, payload=payload, qos=mqtt.QoS(qos)
        )
        publish_future.result()
        return packet_id
        
    def process_csv_file(self, csv_path, delay_ms=100, move_after_publish=True, dry_run=False):
        csv_path = Path(csv_path)
        if not csv_path.exists():
            print(f"Error: File not found: {csv_path}")
            return 0
        
        # Extract device ID from this specific file
        file_device_id = extract_device_id_from_filename(str(csv_path))
        if not file_device_id:
            file_device_id = extract_device_id_from_csv_content(str(csv_path))
        
        # Use file-specific device ID if found, otherwise fall back to client_id
        device_id = file_device_id if file_device_id else self.client_id
            
        print(f"\nProcessing: {csv_path.name}")
        topic = f"AM/{device_id}/sensors"
        print(f"Publishing to topic: {topic}")
        if dry_run:
            print("  DRY RUN MODE - Not actually publishing to AWS IoT")
        
        row_count = 0
        publish_success = True
        header_map = None
        first_data_line = None
        
        with open(csv_path, 'r', encoding='utf-8') as f:
            for line_num, line in enumerate(f, 1):
                line = line.strip()
                if not line:
                    continue
                
                # Check if first line is a header or data
                if line_num == 1:
                    if line.startswith('device_id') or line.startswith('Device'):
                        # This is a header row
                        header_fields = line.split(',')
                        header_map = {field.strip(): idx for idx, field in enumerate(header_fields)}
                        print(f"  Parsed header with {len(header_map)} fields")
                        if 'timestamp' in header_map:
                            print(f"  Found 'timestamp' at index {header_map['timestamp']}")
                        continue
                    else:
                        # First line is data, not a header - use default header
                        print(f"  No header found, using default CSV header")
                        header_fields = DEFAULT_CSV_HEADER.split(',')
                        header_map = {field.strip(): idx for idx, field in enumerate(header_fields)}
                        print(f"  Using default header with {len(header_map)} fields")
                        # Process this line as data (don't skip it)
                        first_data_line = line
                
                if header_map is None:
                    print(f"  Error: Could not determine CSV format")
                    return 0
                
                # Process first data line if we saved it
                if first_data_line:
                    line = first_data_line
                    first_data_line = None
                
                json_message = csv_row_to_json(line, header_map)
                
                if dry_run:
                    # In dry run mode, just print the first few messages
                    row_count += 1
                    if row_count <= 3:
                        print(f"\n  Row {row_count}:")
                        print(f"    Timestamp: {json_message.get('timestamp', 'N/A')}")
                        print(f"    Device ID: {json_message.get('device_id', 'N/A')}")
                        print(f"    Fields: {len(json_message)} total")
                        # Show a few sample sensor values
                        sample_fields = ['temperature', 'humidity', 'pressure', 'loopcounter']
                        for field in sample_fields:
                            if field in json_message:
                                print(f"    {field}: {json_message[field]}")
                    elif row_count == 4:
                        print(f"\n  ... (showing first 3 rows only)")
                else:
                    try:
                        packet_id = self.publish_message(topic, json_message)
                        row_count += 1
                        timestamp = json_message.get('timestamp', 'N/A')
                        print(f"  Published row {row_count} (timestamp: {timestamp}, packet: {packet_id})")
                        if delay_ms > 0:
                            time.sleep(delay_ms / 1000.0)
                    except Exception as e:
                        print(f"  Error publishing row {row_count + 1}: {e}")
                        publish_success = False
                    
        print(f"\nCompleted: {row_count} rows {'parsed' if dry_run else 'published'} from {csv_path.name}")
        
        if move_after_publish and publish_success and row_count > 0 and not dry_run:
            try:
                published_dir = csv_path.parent / 'published'
                published_dir.mkdir(exist_ok=True)
                destination = published_dir / csv_path.name
                csv_path.rename(destination)
                print(f"✓ Moved {csv_path.name} to published/")
            except Exception as e:
                print(f"Warning: Could not move file: {e}")
        
        # Add 2 second delay after completing each file to avoid AWS IoT rate limits
        if not dry_run and row_count > 0:
            print(f"  Waiting 2 seconds before next file...")
            time.sleep(2)
        
        return row_count



def main():
    parser = argparse.ArgumentParser(
        description="Publish Air Monitor CSV data to AWS IoT MQTT (Standalone)",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
  python csv_to_mqtt_standalone.py --csv AM-6028_1735689600.csv
  python csv_to_mqtt_standalone.py --dir ./csv_files
  python csv_to_mqtt_standalone.py --csv data.csv --endpoint my-endpoint.iot.us-west-2.amazonaws.com
        """
    )
    
    input_group = parser.add_mutually_exclusive_group(required=True)
    input_group.add_argument('--csv', help='Single CSV file to process')
    input_group.add_argument('--dir', help='Directory containing CSV files')
    
    parser.add_argument('--endpoint', default='a1njj292w2vjt1-ats.iot.us-west-2.amazonaws.com',
                       help='AWS IoT endpoint (default: a1njj292w2vjt1-ats.iot.us-west-2.amazonaws.com)')
    parser.add_argument('--delay', type=int, default=100,
                       help='Delay between messages in ms (default: 100)')
    parser.add_argument('--no-move', action='store_true',
                       help='Do not move files to published/ folder after publishing')
    parser.add_argument('--dry-run', action='store_true',
                       help='Parse CSV and show output without connecting to AWS IoT')
    
    args = parser.parse_args()
    
    # Determine first CSV file
    first_csv_file = args.csv if args.csv else None
    if not first_csv_file and args.dir:
        directory = Path(args.dir)
        if directory.exists():
            csv_files = sorted(directory.glob('*.csv'))
            if csv_files:
                first_csv_file = str(csv_files[0])
    
    if not first_csv_file:
        print("Error: No CSV files found to process")
        sys.exit(1)
    
    # Extract device ID
    print(f"Extracting device ID from: {Path(first_csv_file).name}")
    device_id = extract_device_id_from_filename(first_csv_file)
    
    if not device_id:
        print("  Device ID not found in filename, checking CSV content...")
        device_id = extract_device_id_from_csv_content(first_csv_file)
    
    if not device_id:
        print("Error: Could not extract device ID from CSV file")
        print("  Expected filename format: {DEVICE_ID}_{TIMESTAMP}.csv")
        sys.exit(1)
    
    print(f"✓ Device ID: {device_id}")
    print(f"✓ Endpoint: {args.endpoint}")
    
    # Create temporary certificate files
    print("Creating temporary certificate files...")
    ca_path, cert_path, key_path = create_temp_cert_files()
    print("✓ Certificates ready")
    
    # Create publisher
    publisher = CSVToMQTTPublisher(
        endpoint=args.endpoint,
        cert_path=cert_path,
        key_path=key_path,
        ca_path=ca_path,
        client_id=device_id
    )
    
    try:
        if not args.dry_run:
            publisher.connect()
        move_after_publish = not args.no_move
        
        if args.csv:
            total_rows = publisher.process_csv_file(args.csv, args.delay, move_after_publish, args.dry_run)
        else:
            # Process directory
            directory = Path(args.dir)
            csv_files = sorted(directory.glob('*.csv'))
            total_rows = 0
            for csv_file in csv_files:
                rows = publisher.process_csv_file(csv_file, args.delay, move_after_publish, args.dry_run)
                total_rows += rows
            
        print(f"\n✓ Total rows {'parsed' if args.dry_run else 'published'}: {total_rows}")
        if not args.dry_run:
            print(f"✓ All messages published to topic: AM/{device_id}/sensors")
            if move_after_publish:
                print(f"✓ Successfully published files moved to 'published/' folder")
        
    except KeyboardInterrupt:
        print("\n\nInterrupted by user")
    except Exception as e:
        print(f"\nError: {e}")
        import traceback
        traceback.print_exc()
        sys.exit(1)
    finally:
        if not args.dry_run:
            publisher.disconnect()


if __name__ == "__main__":
    main()
