Charts: Box plot

import io.data2viz.charts.* import io.data2viz.charts.core.* import io.data2viz.charts.core.Labelled import io.data2viz.charts.dimension.* import io.data2viz.charts.chart.* import io.data2viz.charts.chart.mark.* import io.data2viz.charts.viz.* import io.data2viz.charts.layout.* import io.data2viz.geom.* import io.data2viz.color.* import io.data2viz.random.RandomDistribution import io.data2viz.timeFormat.* import io.data2viz.timeFormat.TimeLocale import io.data2viz.random.RandomGenerator import kotlinx.datetime.* import io.data2viz.math.* fun main() { val width = 450.0 val height = 300.0 val vc = newVizContainer().apply { size = Size(width, height) } vc.chart(badData) { config { events { zoomMode = ZoomMode.XY panMode = PanMode.XY } tooltip { fontSize = config.tooltip.fontSize + 2.0 } cursor { show = true } } val id = discrete({ domain.name }) val candleValues = quantitative({ domain.median }) { name = "Statistical values" } boxPlot(id, candleValues) { minimum = { domain.minimum } maximum = { domain.maximum } upperQuartile = { domain.upperQuartile } lowerQuartile = { domain.lowerQuartile } outliers = { domain.outliers } x { bandwidthRatio = 50.pct } y { start = 80.0 end = 120.0 } } } } data class Record(val index: Int, val value: Double, val timeStamp:Instant) /** * Data class for creating "Box And Whisker" */ public data class BoxAndWhiskerData( val name: String, val minimum: Double, val maximum: Double, val median: Double, val lowerQuartile: Double, val upperQuartile: Double, val mean: Double, val outliers: List<Double> ) : Labelled { override val label: String = name } fun randomSamples(numSamples: Int, rng: RandomGenerator): List<Record> = Array(numSamples) { Record(it / 20, rng(), Instant.fromEpochMilliseconds(1611150127144L + (it * 8632L))) }.toList() val rng = io.data2viz.random.RandomDistribution(42).normal(100.0, 5.0) val data = randomSamples(200, rng) val recordsBySeries: List<List<Double>> = data.groupBy { it.index }.values.map { it.map { it.value } } val badData = analyseData() /** * Build boxAndWhiskers data from a list of list */ public fun boxAndWhiskers(values: List<List<Double>>): List<BoxAndWhiskerData> = values.mapIndexed { index, data -> boxAndWhiskers(index, data) } private fun boxAndWhiskers(index: Int, values: List<Double>): BoxAndWhiskerData { val orderedValues = values.sorted() val median = median(orderedValues) val lowerQuartile = median(median.lowerList) val upperQuartile = median(median.upperList) val IQR = upperQuartile.medianValue - lowerQuartile.medianValue val outlierThreshold = IQR * 1.5 val outlierThresholdMin = median.medianValue - outlierThreshold val outlierThresholdMax = median.medianValue + outlierThreshold val outliers = orderedValues.filter { it < outlierThresholdMin || it > outlierThresholdMax } val minimum = lowerQuartile.lowerList.first { it >= outlierThresholdMin } val maximum = upperQuartile.upperList.last { it <= outlierThresholdMax } val mean = orderedValues.sum() / orderedValues.size return BoxAndWhiskerData("Sample #$index", minimum, maximum, median.medianValue, lowerQuartile.medianValue, upperQuartile.medianValue, mean, outliers) } public data class Median( val medianValue: Double, val lowerList: List<Double>, val upperList: List<Double> ) private fun median(values: List<Double>): Median { val orderedValues = values.sorted() val vs = orderedValues.size val mid = vs / 2 val median = if (vs % 2 == 1) orderedValues[mid] else (orderedValues[mid] + orderedValues[mid + 1]) / 2.0 val listA = if (vs % 2 == 1) orderedValues.subList(0, mid) else orderedValues.subList(0, mid + 1) val listB = orderedValues.subList(mid + 1, vs) return Median(median, listA, listB) } fun analyseData(): List<BoxAndWhiskerData> { val rng = RandomDistribution(42).normal(100.0, 5.0) val data = randomSamples(200, rng) val recordsBySeries: List<List<Double>> = data.groupBy { it.index }.values.map { it.map { it.value } } val boxAndWhiskers = boxAndWhiskers(recordsBySeries) return boxAndWhiskers }
pierre avatar

Sketch created by

pierre

A simple box plot. Note that I start from a collection of "Record", and compute everything to build a collection of "BoxAndWhiskerData" containing all the statistical values (mean, max, outliers...) so I can use this directly in my chart. TL;DR: Charts.kt boxplot does not compute the statistical data itself, you need to compute them before, store them in a convenient object to use it in your chart.

comments