Intro to Probability Distributions
draft
I donβt have a description yet.
Published
January 30, 2025
The Normally Density Curve
This density indicates that 65% of trees are between 5 and 10 meters tall.
The Forest with Normally Distributed Tree Heights
This forest contains 15 large trees (those taller than 40 meters). If tree height follows a normal distribution, the probability of a tree being taller than 40 meters is around 15.56%.
d3 = require("d3@7");
// import { aq, op } from '@uwdata/arquero';
/* Constants -----------------------------------------------------------------*/
// Observable is picky about object creation, see:
// https://observablehq.com/documentation/cells/observable-javascript#statements-need-curly-braces-and-return-or-yield
margin = { return {left: 40, right: 30, top: 30, bottom: 30}; }
plot_width = 500;
plot_height = 400;
width = plot_width - margin.left - margin.right;
height = plot_height - margin.top - margin.bottom;
green = "#59B85E";
/* SVG Initialization --------------------------------------------------------*/
svg = d3.select("#density-container")
.append("svg")
.attr("viewBox", [0, 0, plot_width, plot_height])
.attr("style", "max-width: 100%; height: auto;")
// .style("background", "red")
// This defines the centered "panel"
.append("g")
.attr("transform", "translate(" + margin.left + "," + margin.top + ")");
/* Axes ----------------------------------------------------------------------*/
// We'll initially plot the normal density
initial_density = normal_density;
x = d3.scaleLinear()
.range([0, width])
.domain([0, Math.max(...heights)]);
x_axis = svg.append("g")
.attr("transform", "translate(0," + height + ")")
.call(d3.axisBottom(x));
y = d3.scaleLinear()
.range([height, 0])
.domain([0, Math.round(Math.max(...initial_density) * 1.25 * 100) / 100]);
y_axis = svg.append("g").call(d3.axisLeft(y));
/* Curve ---------------------------------------------------------------------*/
// Creates an array of [x, y] pairs. Add [0, 0] and [70, 0] points to all densities.
density = d3.zip(
[0, ...heights, 70],
[0, ...initial_density, 0]
);
curve = svg
.append('g')
.append("path")
// Note that we use `.datum()` since the path requires *all* of the data-points
.datum(density)
.attr("fill", green)
.attr("fill-opacity", 0.5)
.attr("stroke", green)
.attr("stroke-width", 2)
.attr("stroke-linejoin", "round")
.attr("d", d3.line()
.curve(d3.curveBasis)
.x(function(d) { return x(d[0]); })
.y(function(d) { return y(d[1]); })
);
canvas = d3.select("#forest-canvas-container").append("canvas");
// The context carries all of the information *about* the <canvas>, i.e. what
// to draw and where
context = canvas.node().getContext("2d");
context.textAlign = "center";
canvas_width = 1000;
canvas_height = 700;
canvas_width_padding = 100;
canvas_height_padding = 100;
canvas_font_size = 80;
scale = window.devicePixelRatio || 1;
canvas
// Adjust (e.g. upscale) the canvas dimensions by the `scale`
.attr("width", canvas_width * scale)
.attr("height", canvas_height * scale)
// .style("width", `${canvas_width}px`)
// .style("height", `${canvas_height}px`);
.style("width", "100%")
.style("height", "auto")
forest_positions =
transpose(forest_positions_raw)
.map(d => ({
x: d.x * (canvas_width - canvas_width_padding) + (canvas_width_padding / 2),
y: d.y * (canvas_height - canvas_height_padding) + (canvas_width_padding / 2),
emoji: d.emoji,
type: d.type,
font_scale: d.font_scale
}));
// This is the interface through which we use D3. We create an element, which we
// can data-bind to in the usual way, but we *don't* add it to the DOM. We use
// this container to *store* the data and use D3 to manipulate entering, exiting,
// and updating data as usual. We pass this information onto <canvas> for drawing.
content_container = document.createElement("content");
// Append the trees to the container as you normally would, instead of `.style()`
// always use attribute, since we're just interested in telling <canvas> what to
// do.
content = d3.select(content_container)
.selectAll()
.data(forest_positions)
.join("text")
.attr("font-size", d => canvas_font_size * d.font_scale)
.attr("x", d => d.x)
.attr("y", d => d.y)
.text(d => d.emoji)
.classed("tree", d => (d.type === "tree"));
// console.log(content.nodes());
console.log(trees.nodes());
draw = function() {
// Clear the canvas
context.clearRect(0, 0, canvas_width, canvas_height);
// Render every element to the <canvas>
content.each(function(d, i) {
const node = d3.select(this);
context.font = `${node.attr("font-size")}px monospace`;
context.fillText(node.text(), node.attr("x"), node.attr("y"));
});
}
densities = {
return {
Normal: normal_density,
Uniform: uniform_density,
Point: point_density,
Gamma: left_gamma_density,
"Reverse Gamma": right_gamma_density,
Bimodal: bimodal_density
}
};
// https://stats.stackexchange.com/questions/281162/scale-a-number-between-a-range
rescale = function(x, x_min, x_max, target_min, target_max) {
return ((x - x_min) / (x_max - x_min)) * (target_max - target_min) + target_min;
};
rescale_array = function(array, target_min, target_max) {
const array_min = Math.min(...array);
const array_max = Math.max(...array);
return array.map(x => rescale(x, array_min, array_max, target_min, target_max));
};
// Scale the samples from each distribution such that they're relative to the
// canvas font size (since this is what controls the tree "height").
min_sample_value = canvas_font_size * 0.5;
max_sample_value = canvas_font_size * 1.75;
// The normal and the point sample should have the same mean, so I'm just going
// to set the point sample tree height to that of the mean normal height, since
// we can't re-scale the point mass (it has min == max).
scaled_normal_mean = (rescale_array(normal_sample, min_sample_value, max_sample_value).reduce((a, b) => a + b)) / normal_sample.length;
scaled_point_sample = point_sample.map(x => scaled_normal_mean);
// We need to re-scale each sample to accommodate a reasonable font size
samples = {
return {
Normal: rescale_array(normal_sample, min_sample_value, max_sample_value),
Uniform: rescale_array(uniform_sample, min_sample_value, max_sample_value),
Point: scaled_point_sample,
Gamma: rescale_array(left_gamma_sample, min_sample_value, max_sample_value),
"Reverse Gamma": rescale_array(right_gamma_sample, min_sample_value, max_sample_value),
Bimodal: rescale_array(bimodal_sample, min_sample_value, max_sample_value)
}
};
// Create buttons for switching between `densities`
buttons = d3.select("#density-button-container")
.style("display", "flex")
.style("gap", "10px")
.selectAll('density-button')
.data(Object.keys(densities))
.enter()
.append('div')
.attr("class", "button-filter")
.call(div => div.append("p")
.style("margin", "0px")
.text(function(d) { return d })
)
// Passes the density key of the inner text element to the density function
.on("click", function() {
// Toggle the button's active state
d3.selectAll(".button-filter").classed("active", false);
d3.select(this).classed("active", true);
// Update the plots
update(d3.select(this).select("p").text());
});
// Update the distribution shown in the D3 plot and "forest".
update = function(selected_distribution) {
const duration_ms = 1000;
const current_density = densities[selected_distribution];
const current_sample = samples[selected_distribution];
// DENSITY PLOT
const density = d3.zip(
[0, ...heights, 70],
[0, ...current_density, 0]
);
// Update the y-axis
const y = d3.scaleLinear()
.range([height, 0])
.domain([0, Math.round(Math.max(...current_density) * 1.25 * 100) / 100]);
// Transition the existing y-axis into the new one
y_axis
.transition()
.duration(duration_ms)
.call(d3.axisLeft(y));
// Update the "d" (path) attribute of the density curve
curve
.datum(density)
.transition()
.duration(duration_ms)
.attr("d", d3.line()
.curve(d3.curveBasis)
.x(function(d) { return x(d[0]); })
.y(function(d) { return y(d[1]); })
);
// FOREST
// Update the tree font-size in `content` and re-draw the forest
const trees = d3.select(content_container).selectAll("text.tree");
trees
.transition()
.duration(duration_ms)
.attr("font-size", (d, i) => (current_sample[i]));
// Update loop for the <canvas> drawing, synced with the `content.transition()`
// above. `d3.timer()` is just like `requestAnimationFrame`, it's going to run
// at 60 fps by default. Recall that `draw()` is getting data from the global
// `content` object, so it's synced automatically.
var t = d3.timer(function(elapsed) {
draw();
if (elapsed > duration_ms) {
t.stop();
}
});
};
// Initialize the forest to match the normal density
update("Normal");