Skip to content

Useful SQL queries for error detection

1 Find Gap in statistic

for measurement entities

-- Check for gaps in statistics - SQLite version
-- Only shows rows with gaps (WHERE gap_seconds > 3600)
-- Shows gap size in hours for easier reading
-- Distinguishes between regular gaps (>1h) and large gaps (>2h)
-- Sorts by largest gaps first (most problematic)
WITH gap_analysis AS (
  SELECT 
    datetime(start_ts, 'unixepoch') as period,
    mean,
    start_ts,
    LAG(start_ts) OVER (ORDER BY start_ts) as previous_ts,
    start_ts - LAG(start_ts) OVER (ORDER BY start_ts) as gap_seconds
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.temperature_entree')
)
SELECT 
  period,
  mean,
  gap_seconds / 3600.0 as gap_hours,
  CASE 
    WHEN gap_seconds > 7200 THEN '⚠️ LARGE GAP (>2 hours)'
    WHEN gap_seconds > 3600 THEN '⚠️ GAP DETECTED'
  END as gap_severity
FROM gap_analysis
WHERE gap_seconds > 3600  -- Only show gaps > 1 hour
ORDER BY gap_seconds DESC  -- Show largest gaps first
LIMIT 50;
-- Check for gaps in statistics same as above - MariaDB version
WITH gap_analysis AS (
  SELECT 
    FROM_UNIXTIME(start_ts) as period,
    mean,
    start_ts,
    LAG(start_ts) OVER (ORDER BY start_ts) as previous_ts,
    start_ts - LAG(start_ts) OVER (ORDER BY start_ts) as gap_seconds
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.temperature_entree')
)
SELECT 
  period,
  mean,
  gap_seconds / 3600.0 as gap_hours,
  CASE 
    WHEN gap_seconds > 7200 THEN '⚠️ LARGE GAP (>2 hours)'
    WHEN gap_seconds > 3600 THEN '⚠️ GAP DETECTED'
  END as gap_severity
FROM gap_analysis
WHERE gap_seconds > 3600  -- Only show gaps > 1 hour
ORDER BY gap_seconds DESC  -- Show largest gaps first
LIMIT 50;

for counter entities

Only show gaps

SQLite version

-- Check for gaps in counter statistics (only show gaps)
WITH gap_analysis AS (
  SELECT 
    datetime(start_ts, 'unixepoch', 'localtime') as period,
    state,
    sum,
    sum - LAG(sum) OVER (ORDER BY start_ts) as consumption,
    start_ts,
    LAG(start_ts) OVER (ORDER BY start_ts) as previous_ts,
    start_ts - LAG(start_ts) OVER (ORDER BY start_ts) as gap_seconds
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.east')
)
SELECT 
  period,
  state as counter_value,
  sum as cumulative_sum,
  consumption as period_consumption,
  gap_seconds / 3600.0 as gap_hours,
  CASE 
    WHEN gap_seconds > 7200 THEN '⚠️ LARGE GAP (>2 hours)'
    WHEN gap_seconds > 3600 THEN '⚠️ GAP DETECTED'
  END as gap_severity,
  '❌ Missing consumption data for this period' as impact
FROM gap_analysis
WHERE gap_seconds > 3600  -- Only show gaps > 1 hour
ORDER BY gap_seconds DESC
LIMIT 50;

MariaDB version

-- Check for gaps in counter statistics (only show gaps)
WITH gap_analysis AS (
  SELECT 
    FROM_UNIXTIME(start_ts) as period,
    state,
    sum,
    sum - LAG(sum) OVER (ORDER BY start_ts) as consumption,
    start_ts,
    LAG(start_ts) OVER (ORDER BY start_ts) as previous_ts,
    start_ts - LAG(start_ts) OVER (ORDER BY start_ts) as gap_seconds
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.east')
)
SELECT 
  period,
  state as counter_value,
  sum as cumulative_sum,
  consumption as period_consumption,
  gap_seconds / 3600.0 as gap_hours,
  CASE 
    WHEN gap_seconds > 7200 THEN '⚠️ LARGE GAP (>2 hours)'
    WHEN gap_seconds > 3600 THEN '⚠️ GAP DETECTED'
  END as gap_severity,
  '❌ Missing consumption data for this period' as impact
FROM gap_analysis
WHERE gap_seconds > 3600  -- Only show gaps > 1 hour
ORDER BY gap_seconds DESC
LIMIT 50;
-- Show gaps with before/after context - SQLite
SELECT 
  datetime(s1.start_ts, 'unixepoch', 'localtime') as last_record_before_gap,
  s1.state as state_before,
  s1.sum as sum_before,
  '⚠️ --- GAP ---' as gap_indicator,
  ROUND((s2.start_ts - s1.start_ts) / 3600.0, 1) as gap_hours,
  datetime(s2.start_ts, 'unixepoch', 'localtime') as first_record_after_gap,
  s2.state as state_after,
  s2.sum as sum_after,
  s2.sum - s1.sum as sum_change_across_gap,
  CASE 
    WHEN s2.sum = s1.sum THEN '❌ No consumption recorded (sum unchanged)'
    WHEN s2.state < s1.state THEN '⚠️ Counter may have reset during gap'
    ELSE '⚠️ Consumption during gap unknown'
  END as gap_impact
FROM statistics s1
JOIN statistics s2 ON s2.metadata_id = s1.metadata_id 
  AND s2.start_ts = (
    SELECT MIN(start_ts) 
    FROM statistics 
    WHERE metadata_id = s1.metadata_id 
    AND start_ts > s1.start_ts
  )
WHERE s1.metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.energy_total')
  AND (s2.start_ts - s1.start_ts) > 3600
ORDER BY gap_hours DESC
LIMIT 50;

Show gaps with before/after context

MariaDB version

-- Show gaps with before/after context - MariaDB
SELECT 
  FROM_UNIXTIME(s1.start_ts) as last_record_before_gap,
  s1.state as state_before,
  s1.sum as sum_before,
  '⚠️ --- GAP ---' as gap_indicator,
  ROUND((s2.start_ts - s1.start_ts) / 3600.0, 1) as gap_hours,
  FROM_UNIXTIME(s2.start_ts) as first_record_after_gap,
  s2.state as state_after,
  s2.sum as sum_after,
  s2.sum - s1.sum as sum_change_across_gap,
  CASE 
    WHEN s2.sum = s1.sum THEN '❌ No consumption recorded (sum unchanged)'
    WHEN s2.state < s1.state THEN '⚠️ Counter may have reset during gap'
    ELSE '⚠️ Consumption during gap unknown'
  END as gap_impact
FROM statistics s1
JOIN statistics s2 ON s2.metadata_id = s1.metadata_id 
  AND s2.start_ts = (
    SELECT MIN(start_ts) 
    FROM statistics 
    WHERE metadata_id = s1.metadata_id 
    AND start_ts > s1.start_ts
  )
WHERE s1.metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.east')
  AND (s2.start_ts - s1.start_ts) > 3600
ORDER BY gap_hours DESC
LIMIT 50;

2 Invalid Data / Spikes

for measurement

-- Find measurement outliers (values > 3 standard deviations from mean)
WITH stats AS (
  SELECT 
    AVG(mean) as avg_mean,
    AVG(mean * mean) - AVG(mean) * AVG(mean) as variance
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.e3_vitocal_boiler_supply_temperature')
)
SELECT 
  datetime(start_ts, 'unixepoch', 'localtime') as period,
  mean,
  min,
  max,
  CASE 
    WHEN ABS(mean - (SELECT avg_mean FROM stats)) > 3 * SQRT((SELECT variance FROM stats))
    THEN '⚠️ OUTLIER'
    ELSE 'OK'
  END as outlier_check
FROM statistics
WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.temperature')
ORDER BY start_ts DESC;

for counters

SQLite version

-- Find invalid spikes in counter statistics
WITH counter_analysis AS (
  SELECT 
    datetime(start_ts, 'unixepoch', 'localtime') as period,
    state,
    sum,
    LAG(state) OVER (ORDER BY start_ts) as previous_state,
    LAG(sum) OVER (ORDER BY start_ts) as previous_sum,
    state - LAG(state) OVER (ORDER BY start_ts) as state_change,
    sum - LAG(sum) OVER (ORDER BY start_ts) as consumption,
    -- Calculate average consumption over last 24 periods
    AVG(sum - LAG(sum) OVER (ORDER BY start_ts)) OVER (
      ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING
    ) as avg_24h_consumption,
    -- Calculate standard deviation
    (sum - LAG(sum) OVER (ORDER BY start_ts)) as hourly_consumption
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.east')
)
SELECT 
  period,
  state as counter_value,
  sum as cumulative_sum,
  consumption as hourly_consumption,
  ROUND(avg_24h_consumption, 2) as avg_24h,
  ROUND(consumption / NULLIF(avg_24h_consumption, 0), 1) as spike_multiplier,
  CASE 
    WHEN consumption IS NULL THEN 'First record'
    WHEN avg_24h_consumption = 0 THEN '⚠️ No baseline yet'
    WHEN consumption < 0 THEN '❌ NEGATIVE consumption (impossible!)'
    WHEN consumption > avg_24h_consumption * 10 THEN '❌ EXTREME SPIKE (>10x normal)'
    WHEN consumption > avg_24h_consumption * 5 THEN '⚠️ LARGE SPIKE (>5x normal)'
    WHEN consumption > avg_24h_consumption * 3 THEN '⚠️ SPIKE DETECTED (>3x normal)'
    WHEN ABS(state_change) < 0.001 AND consumption > 0 THEN '⚠️ Sum increased but state unchanged'
    ELSE 'OK'
  END as spike_status,
  CASE 
    WHEN consumption < 0 THEN 'Counter decreased (hardware error or missed reset)'
    WHEN consumption > avg_24h_consumption * 10 THEN 'Unrealistic consumption spike'
    WHEN ABS(state_change) < 0.001 AND consumption > 0 THEN 'Sum/state mismatch'
    ELSE ''
  END as issue_description
FROM counter_analysis
WHERE consumption IS NOT NULL
  AND avg_24h_consumption > 0
  AND (
    consumption < 0 
    OR consumption > avg_24h_consumption * 3
    OR (ABS(state_change) < 0.001 AND consumption > 0)
  )
ORDER BY ABS(consumption / NULLIF(avg_24h_consumption, 1)) DESC
LIMIT 50;

MariaDB version

-- Find invalid spikes in counter statistics
WITH consumption_calc AS (
  -- Step 1: Calculate consumption for each period
  SELECT 
    FROM_UNIXTIME(start_ts) as period,
    start_ts,
    state,
    sum,
    LAG(state) OVER (ORDER BY start_ts) as previous_state,
    LAG(sum) OVER (ORDER BY start_ts) as previous_sum,
    state - LAG(state) OVER (ORDER BY start_ts) as state_change,
    sum - LAG(sum) OVER (ORDER BY start_ts) as consumption
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.east')
),
counter_analysis AS (
  -- Step 2: Calculate average consumption over last 24 periods
  SELECT 
    period,
    state,
    sum,
    previous_state,
    previous_sum,
    state_change,
    consumption,
    AVG(consumption) OVER (
      ORDER BY start_ts
      ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING
    ) as avg_24h_consumption
  FROM consumption_calc
)
SELECT 
  period,
  state as counter_value,
  sum as cumulative_sum,
  ROUND(consumption, 2) as hourly_consumption,
  ROUND(avg_24h_consumption, 2) as avg_24h,
  ROUND(consumption / NULLIF(avg_24h_consumption, 0), 1) as spike_multiplier,
  CASE 
    WHEN consumption IS NULL THEN 'First record'
    WHEN avg_24h_consumption = 0 OR avg_24h_consumption IS NULL THEN '⚠️ No baseline yet'
    WHEN consumption < 0 THEN '❌ NEGATIVE consumption (impossible!)'
    WHEN consumption > avg_24h_consumption * 10 THEN '❌ EXTREME SPIKE (>10x normal)'
    WHEN consumption > avg_24h_consumption * 5 THEN '⚠️ LARGE SPIKE (>5x normal)'
    WHEN consumption > avg_24h_consumption * 3 THEN '⚠️ SPIKE DETECTED (>3x normal)'
    WHEN ABS(state_change) < 0.001 AND consumption > 0 THEN '⚠️ Sum increased but state unchanged'
    ELSE 'OK'
  END as spike_status,
  CASE 
    WHEN consumption < 0 THEN 'Counter decreased (hardware error or missed reset)'
    WHEN consumption > avg_24h_consumption * 10 THEN 'Unrealistic consumption spike'
    WHEN ABS(state_change) < 0.001 AND consumption > 0 THEN 'Sum/state mismatch'
    ELSE ''
  END as issue_description
FROM counter_analysis
WHERE consumption IS NOT NULL
  AND avg_24h_consumption > 0
  AND (
    consumption < 0 
    OR consumption > avg_24h_consumption * 3
    OR (ABS(state_change) < 0.001 AND consumption > 0)
  )
ORDER BY ABS(consumption / NULLIF(avg_24h_consumption, 1)) DESC
LIMIT 50;
-- Detect spike + recovery pattern (glitch signature) - SQLite (corrected)
WITH consumption_calc AS (
  -- Step 1: Calculate consumption and next consumption
  SELECT 
    datetime(start_ts, 'unixepoch', 'localtime') as period,
    start_ts,
    state,
    sum,
    sum - LAG(sum) OVER (ORDER BY start_ts) as consumption,
    LEAD(sum) OVER (ORDER BY start_ts) - sum as next_consumption
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.east')
),
consumption_with_avg AS (
  -- Step 2: Calculate rolling average
  SELECT 
    period,
    state,
    sum,
    consumption,
    next_consumption,
    AVG(consumption) OVER (
      ORDER BY start_ts
      ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING
    ) as avg_consumption
  FROM consumption_calc
)
SELECT 
  period,
  state,
  sum,
  ROUND(consumption, 2) as this_period_consumption,
  ROUND(next_consumption, 2) as next_period_consumption,
  ROUND(avg_consumption, 2) as baseline_avg,
  CASE 
    WHEN consumption > avg_consumption * 5 
         AND next_consumption < 0 
         AND ABS(next_consumption) > avg_consumption * 3
    THEN '❌ SPIKE + DROP GLITCH DETECTED'
    WHEN consumption > avg_consumption * 5 
         AND next_consumption < avg_consumption * 0.2
    THEN '⚠️ SPIKE followed by very low consumption'
    WHEN consumption < 0 
         AND next_consumption > avg_consumption * 3
    THEN '⚠️ DROP followed by spike (inverse glitch)'
    ELSE 'Potential issue'
  END as glitch_pattern,
  'Data integrity compromised - manual correction may be needed' as recommendation
FROM consumption_with_avg
WHERE avg_consumption > 0
  AND (
    -- Spike followed by negative consumption
    (consumption > avg_consumption * 5 AND next_consumption < 0) OR
    -- Negative consumption followed by spike
    (consumption < 0 AND next_consumption > avg_consumption * 3) OR
    -- Extreme spike followed by near-zero
    (consumption > avg_consumption * 10 AND next_consumption < avg_consumption * 0.2)
  )
ORDER BY period DESC
LIMIT 50;
-- Detect spike + recovery pattern (glitch signature) - MariaDB (corrected)
WITH consumption_calc AS (
  -- Step 1: Calculate consumption and next consumption
  SELECT 
    FROM_UNIXTIME(start_ts) as period,
    start_ts,
    state,
    sum,
    sum - LAG(sum) OVER (ORDER BY start_ts) as consumption,
    LEAD(sum) OVER (ORDER BY start_ts) - sum as next_consumption
  FROM statistics
  WHERE metadata_id = (SELECT id FROM statistics_meta WHERE statistic_id = 'sensor.east')
),
consumption_with_avg AS (
  -- Step 2: Calculate rolling average
  SELECT 
    period,
    state,
    sum,
    consumption,
    next_consumption,
    AVG(consumption) OVER (
      ORDER BY start_ts
      ROWS BETWEEN 24 PRECEDING AND 1 PRECEDING
    ) as avg_consumption
  FROM consumption_calc
)
SELECT 
  period,
  state,
  sum,
  ROUND(consumption, 2) as this_period_consumption,
  ROUND(next_consumption, 2) as next_period_consumption,
  ROUND(avg_consumption, 2) as baseline_avg,
  CASE 
    WHEN consumption > avg_consumption * 5 
         AND next_consumption < 0 
         AND ABS(next_consumption) > avg_consumption * 3
    THEN '❌ SPIKE + DROP GLITCH DETECTED'
    WHEN consumption > avg_consumption * 5 
         AND next_consumption < avg_consumption * 0.2
    THEN '⚠️ SPIKE followed by very low consumption'
    WHEN consumption < 0 
         AND next_consumption > avg_consumption * 3
    THEN '⚠️ DROP followed by spike (inverse glitch)'
    ELSE 'Potential issue'
  END as glitch_pattern,
  'Data integrity compromised - manual correction may be needed' as recommendation
FROM consumption_with_avg
WHERE avg_consumption > 0
  AND (
    -- Spike followed by negative consumption
    (consumption > avg_consumption * 5 AND next_consumption < 0) OR
    -- Negative consumption followed by spike
    (consumption < 0 AND next_consumption > avg_consumption * 3) OR
    -- Extreme spike followed by near-zero
    (consumption > avg_consumption * 10 AND next_consumption < avg_consumption * 0.2)
  )
ORDER BY period DESC
LIMIT 50;