Advanced Calculations
Calculate Percentage of Total
using VegaLite, DataFrames
data = DataFrame(
Activity=["Sleeping","Eating","TV","Work","Exercise"],
Time=[8,2,4,8,2]
)
data |>
@vlplot(
height={step=12},
:bar,
transform=[
{
window=[{op="sum",field="Time",as="TotalTime"}],
frame=[nothing,nothing]
},
{
calculate="datum.Time/datum.TotalTime * 100",
as="PercentOfTotal"
}
],
x={"PercentOfTotal:q", axis={title="% of total Time"}},
y={"Activity:n"}
)
A bar graph showing what activites consume what percentage of the day.
Calculate Difference from Average
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
transform=[
{filter="datum.IMDB_Rating != null"},
{
joinaggregate= [{
op=:mean,
field=:IMDB_Rating,
as="AverageRating"
}]
},
{filter="(datum.IMDB_Rating - datum.AverageRating) > 2.5"}
]
) +
@vlplot(
:bar,
x={"IMDB_Rating:q",axis={title="IMDB Rating"}},
y={"Title:o"}
) +
@vlplot(
mark={
:rule,
color="red"
},
x={"AverageRating:q", aggregate="average"}
)
Calculate Difference from Annual Average
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
transform=[
{filter="datum.IMDB_Rating != null"},
{timeUnit="year",field="Release_Date",as="year"},
{
joinaggregate= [{
op=:mean,
field=:IMDB_Rating,
as="AverageYearRating"
}],
groupby=["year"]
},
{
filter="(datum.IMDB_Rating - datum.AverageYearRating) > 2.5"
}
]
) +
@vlplot(
mark={:bar,clip=true},
x={"IMDB_Rating:q",axis={title="IMDB Rating"}},
y={"Title:o"}
) +
@vlplot(
mark=:tick,
color={value="red"},
x="AverageYearRating:q",
y="Title:o"
)
Bar graph showing the best films for the year they were produced, where best is defined by at least 2.5 points above average for that year. The red point shows the average rating for a film in that year, and the bar is the rating that the film recieved.
Calculate Residuals
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
transform=[
{filter="datum.IMDB_Rating != null"},
{filter={timeUnit="year",field="Release_Date",range=[1900,2019]}},
{
joinaggregate=[{
op="mean",
field="IMDB_Rating",
as="AverageRating"
}]
},
{
calculate="datum.IMDB_Rating - datum.AverageRating",
as="RatingDelta"
}
],
:point,
x="Release_Date:t",
y={"RatingDelta:q", axis={title="Rating Delta"}},
color={"RatingDelta:q",scale={domainMid=0},title="Rating Delta"}
)
A dot plot showing each movie in the database, and the difference from the average movie rating. The display is sorted by year to visualize everything in sequential order. The graph is for all Movies before 2019.
Line Charts Showing Ranks Over Time
using VegaLite, DataFrames
data = DataFrame(
team=["Germany", "Mexico", "South Korea", "Sweden", "Germany", "Mexico",
"South Korea", "Sweden", "Germany", "Mexico", "South Korea", "Sweden"],
matchday=[1,1,1,1,2,2,2,2,3,3,3,3],
point=[0,3,0,3,3,6,0,3,3,6,3,6],
diff=[-1,1,-1,1,0,2,-2,0,-2,-1,0,3]
)
data |>
@vlplot(
title={text="World Cup 2018: Group F Rankings"},
transform=[{
sort=[
{field="point", order="descending"},
{field="diff", order="descending"}
],
window=[{
op="rank",
as="rank"
}],
groupby=["matchday"]
}],
mark={
:line,
orient="vertical"
},
x="matchday:o",
y="rank:o",
color={
:team,
scale={
domain=["Germany", "Mexico", "South Korea", "Sweden"],
range=["black", "#127153", "#C91A3C", "#0C71AB"]
}
}
)
Waterfall Chart of Monthly Profit and Loss
using VegaLite, DataFrames
data = DataFrame(
label=["Begin", "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec", "End"],
amount=[4000,1707,-1425,-1030,1812,-1067,-1481,1228,1176,1146,1205,-1388,1492,0]
)
data |>
@vlplot(
width=800,
height=450,
transform=[
{window=[{op="sum",field="amount",as="sum"}]},
{window=[{op="lead",field="label",as="lead"}]},
{
calculate="datum.lead === null ? datum.label : datum.lead",
as="lead"
},
{
calculate="datum.label === 'End' ? 0 : datum.sum - datum.amount",
as="previous_sum"
},
{
calculate="datum.label === 'End' ? datum.sum : datum.amount",
as="amount"
},
{
calculate="(datum.label !== 'Begin' && datum.label !== 'End' && datum.amount > 0 ? '+' : '') + datum.amount",
as="text_amount"
},
{
calculate="(datum.sum + datum.previous_sum) / 2",
as="center"
},
{
calculate="datum.sum < datum.previous_sum ? datum.sum : ''",
as="sum_dec"
},
{
calculate="datum.sum > datum.previous_sum ? datum.sum : ''",
as="sum_inc"
}
],
x={"label:o",sort=nothing,axis={labelAngle=0,title="Months"}},
config={
text={
fontWeight="bold",
color="#404040"
}
}
) +
@vlplot(
mark={:bar,size=45},
y={"previous_sum:q",title="Amount"},
y2=:sum,
color={
condition=[
{test="datum.label === 'Begin' || datum.label === 'End'",value="#f7e0b6"},
{test="datum.sum < datum.previous_sum",value="#f78a64"}
],
value="#93c4aa"
}
) +
@vlplot(
mark={
:rule,
color="#404040",
opacity=1,
strokeWidth=2,
xOffset=-22.5,
x2Offset=22.5
},
x2=:lead,
y="sum:q"
) +
@vlplot(
mark={
:text,
dy=-4,
baseline="bottom"
},
y="sum_inc:q",
text="sum_inc:n"
) +
@vlplot(
mark={
:text,
dy=4,
baseline="top"
},
y="sum_dec:q",
text="sum_dec:n"
) +
@vlplot(
mark={
:text,
fontWeight="bold",
baseline="middle"
},
y="center:q",
text="text_amount:n",
color={
condition=[
{test="datum.label === 'Begin' || datum.label === 'End'",value="#725a30"}
],
value="white"
}
)
Filtering Top-K Items
using VegaLite, DataFrames
data = DataFrame(
student=["A","B","C","D","E","F","G","H","I","J","K","L","M","N","O","P","Q","R","S","T","U","V"],
score=[100,56,88,65,45,23,66,67,13,12,50,78,66,30,97,75,24,42,76,78,21,46]
)
data |>
@vlplot(
transform=[
{
window=[{ op="rank", as="rank" }],
sort=[{ field="score", order="descending" }]
},
{filter="datum.rank <= 5"}
],
mark=:bar,
x="score:q",
y={
"student:n",
sort={field="score",op="average",order="descending"}
}
)
Here we use window transform to derive the total number of students along with the rank of the current student to determine the top K students and display their score.
Top-K Plot with "Others"
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
title="Top Directors by Average Worldwide Gross",
transform=[
{
aggregate=[{op="mean",field="Worldwide_Gross",as="aggregate_gross"}],
groupby=["Director"]
},
{
window=[{op="row_number", as="rank"}],
sort=[{field="aggregate_gross",order="descending"}]
},
{
calculate="datum.rank < 10 ? datum.Director : 'All Others'", as="ranked_director"
}
],
:bar,
x={aggregate="mean","aggregate_gross:q",title=nothing},
y={
sort={op="mean",field="aggregate_gross",order="descending"},
"ranked_director:o",
title=nothing
}
)
Top-K plot with \"others\" by Trevor Manz, adapted from https://observablehq.com/@manzt/top-k-plot-with-others-vega-lite-example.
Using the lookup transform to combine data
using VegaLite, VegaDatasets
dataset("lookup_groups") |>
@vlplot(
transform=[
{
lookup="person",
from={
data=dataset("lookup_people"),
key="name",
fields=["age","height"]
}
}
],
:bar,
x="group:o",
y={"age:q",aggregate="mean"}
)
Cumulative Frequency Distribution
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
:area,
transform=[{
sort=[{field=:IMDB_Rating}],
window=[{field=:count,op="count",as="cumulative_count"}],
frame=[nothing,0]
}],
x="IMDB_Rating:q",
y="cumulative_count:q"
)
Layered Histogram and Cumulative Histogram
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
transform=[
{bin=true,field=:IMDB_Rating,as="bin_IMDB_Rating"},
{
aggregate=[{op=:count,as="count"}],
groupby=["bin_IMDB_Rating", "bin_IMDB_Rating_end"]
},
{filter="datum.bin_IMDB_Rating !== null"},
{
sort=[{field=:bin_IMDB_Rating}],
window=[{field=:count,op="sum",as="cumulative_count"}],
frame=[nothing,0]
}
],
x={"bin_IMDB_Rating:q",scale={zero=false},title="IMDB Rating"},
x2=:bin_IMDB_Rating_end
) +
@vlplot(
:bar,
y="cumulative_count:q"
) +
@vlplot(
mark={:bar,color=:yellow,opacity=0.5},
y="count:q"
)
Parallel Coordinate Plot
using VegaLite, VegaDatasets
dataset("iris") |>
@vlplot(
width=600,
height=300,
config={
axisX={domain=false, labelAngle=0, tickColor="#ccc", "title"=nothing},
view={stroke=nothing},
style={
label={baseline="middle", align="right", dx=-5},
tick={orient="horizontal"}
}
},
transform=[
{ window=[{op="count", as="index" }] },
{fold=["petalLength", "petalWidth", "sepalLength", "sepalWidth"]},
{
joinaggregate=[
{op="min",field="value",as="min"},
{op="max",field="value",as="max"}
],
groupby=["key"]
},
{
calculate="(datum.value - datum.min) / (datum.max-datum.min)",
as="norm_val"
},
{
calculate="(datum.min + datum.max) / 2",
as="mid"
},
{
calculate="'petalLength: '+datum.petalLength+', '+'petalWidth: '+datum.petalWidth+', sepalLength: '+datum.sepalLength+', '+'sepalWidth: '+datum.sepalWidth",
as="tt"
} #How to add a line break?
]
) +
@vlplot(
mark={:rule,color="#ccc"},
detail={aggregate="count",type="quantitative"},
x="key:n"
) +
@vlplot(
mark=:line,
color={field="species",type="nominal"},
detail={field="index",type="nominal"},
opacity={value=0.3},
x="key:n",
y={"norm_val:q",axis=nothing},
tooltip={"tt:n"}
) +
@vlplot(
mark={:text,style="label"},
text={aggregate="max",field="max",type="quantitative"},
x="key:n",
y={value=0}
) +
@vlplot(
mark={:tick,style="tick",size=8,color="#ccc"},
x="key:n",
y={value=0}
) +
@vlplot(
mark={:text,style="label"},
text={aggregate="min",field="mid",type="quantitative"},
x="key:n",
y={value=150}
) +
@vlplot(
mark={:tick,style="tick",size=8,color="#ccc"},
x="key:n",
y={value=150}
) +
@vlplot(
mark={:text,style="label"},
text={aggregate="min",field="min",type="quantitative"},
x="key:n",
y={value=300}
) +
@vlplot(
mark={:tick,style="tick",size=8,color="#ccc"},
x="key:n",
y={value=300}
)
Though Vega-Lite supports only one scale per axes, one can create a parallel coordinate plot by folding variables, using joinaggregate
to normalize their values and using ticks and rules to manually create axes.
Bar Chart Showing Argmax Value
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
mark=:bar,
x={aggregate={argmax="US_Gross"},"Production_Budget:q"},
y={"Major_Genre:n"}
)
The production budget of the movie that has the highest US Gross in each major genre.
Layering Averages over Raw Values
using VegaLite, VegaDatasets
dataset("stocks") |>
@vlplot(
transform=[{filter="datum.symbol==='GOOG'"}]
) +
@vlplot(
mark={:point,opacity=0.3},
x={timeUnit="year","date:t"},
y="price:q"
) +
@vlplot(
mark=:line,
x={timeUnit="year","date:t"},
y={aggregate="mean","price:q"}
)
Plot showing average data with raw values in the background.
Layering Rolling Averages over Raw Values
using VegaLite, VegaDatasets
dataset("seattle-weather") |>
@vlplot(
width=400,
height=300,
transform=[{
frame=[-15,15],
window=[{field="temp_max",op="mean",as="rolling_mean"}]
}]
) +
@vlplot(
mark={:point,opacity=0.3},
x={"date:t",title="Date"},
y={"temp_max:q",title="Max Temperature"}
) +
@vlplot(
mark={:line,size=3,color="red"},
x={"date:t",title="Date"},
y={"rolling_mean:q"}
)
Plot showing a 30 day rolling average with raw values in the background.
Line Chart to Show Benchmarking Results
using VegaLite, DataFrames
data=DataFrame(
falcon= [16.81999969482422,19.759998321533203,16.079999923706055,19.579999923706055,16.420000076293945,16.200000762939453,16.020000457763672,15.9399995803833,16.280000686645508,16.119998931884766,16.15999984741211,16.119998931884766,16.139999389648438,16.100000381469727,16.200000762939453,16.260000228881836,19.35999870300293,19.700000762939453,15.9399995803833,19.139999389648438,16.200000762939453,16.119998931884766,19.520000457763672,19.700000762939453,16.200000762939453,20.979999542236328,16.299999237060547,16.420000076293945,16.81999969482422,16.5,16.560001373291016,16.18000030517578,16.079999923706055,16.239999771118164,16.040000915527344,16.299999237060547,19.399999618530273,15.699999809265137,16.239999771118164,15.920000076293945,16.259998321533203,16.219999313354492,16.520000457763672,16.459999084472656,16.360000610351562,15.719999313354492,16.060001373291016,15.960000991821289,16.479999542236328,16.600000381469727,16.240001678466797,16.940000534057617,16.220001220703125,15.959999084472656,15.899999618530273,16.479999542236328,16.31999969482422,15.75999927520752,15.999998092651367,16.18000030517578,16.219999313354492,15.800000190734863,16.139999389648438,16.299999237060547,16.360000610351562,16.260000228881836,15.959999084472656,15.9399995803833,16.53999900817871,16.139999389648438,16.259998321533203,16.200000762939453,15.899999618530273,16.079999923706055,16.079999923706055,15.699999809265137,15.660000801086426,16.139999389648438,23.100000381469727,16.600000381469727,16.420000076293945,16.020000457763672,15.619999885559082,16.35999870300293,15.719999313354492,15.920001029968262,15.5600004196167,16.34000015258789,22.82000160217285,15.660000801086426,15.5600004196167,16,16,15.819999694824219,16.399999618530273,16.46000099182129,16.059999465942383,16.239999771118164,15.800000190734863,16.15999984741211,16.360000610351562,19.700000762939453,16.10000228881836,16.139999389648438,15.819999694824219,16.439998626708984,16.139999389648438,16.020000457763672,15.860000610351562,16.059999465942383,16.020000457763672,15.920000076293945,15.819999694824219,16.579999923706055,15.880000114440918,16.579999923706055,15.699999809265137,19.380001068115234,19.239999771118164,16,15.980000495910645,15.959999084472656,16.200000762939453,15.980000495910645,16.34000015258789,16.31999969482422,16.260000228881836,15.920000076293945,15.540000915527344,16.139999389648438,16.459999084472656,16.34000015258789,15.819999694824219,19.719999313354492,15.75999927520752,16.499998092651367,15.719999313354492,16.079999923706055,16.439998626708984,16.200000762939453,15.959999084472656,16,16.100000381469727,19.31999969482422,16.100000381469727,16.18000030517578,15.959999084472656,22.639999389648438,15.899999618530273,16.279998779296875,16.100000381469727,15.920000076293945,16.079999923706055,16.260000228881836,15.899999618530273,15.820001602172852,15.699999809265137,15.979998588562012,16.380001068115234,16.040000915527344,19.420000076293945,15.9399995803833,16.15999984741211,15.960000991821289,16.259998321533203,15.780000686645508,15.880000114440918,15.980000495910645,16.060001373291016,16.119998931884766,23.020000457763672,15.619999885559082,15.920000076293945,16.060001373291016,14.780000686645508,16.260000228881836,19.520000457763672,16.31999969482422,16.600000381469727,16.219999313354492,19.740001678466797,19.46000099182129,15.940000534057617,15.839999198913574,16.100000381469727,16.46000099182129,16.17999839782715,16.100000381469727,15.9399995803833,16.060001373291016,15.860000610351562,15.819999694824219,16.03999900817871,16.17999839782715,15.819999694824219,17.299999237060547,15.9399995803833,15.739999771118164,15.719999313354492,15.679998397827148,15.619999885559082,15.600000381469727,16.03999900817871,15.5,15.600001335144043,19.439998626708984,15.960000991821289,16.239999771118164,16.040000915527344,16.239999771118164],
square= [24.200000762939453,17.899999618530273,15.800000190734863,58.400001525878906,151,2523.10009765625,245.3000030517578,136,72.30000305175781,55.70000076293945,42.400001525878906,37.70000076293945,30.100000381469727,30.100000381469727,21.799999237060547,20.600000381469727,21.799999237060547,17.600000381469727,18.200000762939453,21,941.7000122070312,177.39999389648438,2821.800048828125,359.20001220703125,318,217.10000610351562,126,69,57.79999923706055,45.29999923706055,35.599998474121094,29.100000381469727,23.799999237060547,44.20000076293945,17.700000762939453,17.700000762939453,15.699999809265137,27.799999237060547,22.799999237060547,3853.60009765625,91.5999984741211,181.39999389648438,476.29998779296875,265.8999938964844,254.60000610351562,2583.199951171875,124.80000305175781,73.19999694824219,56.400001525878906,48.70000076293945,41.599998474121094,21.100000381469727,20.299999237060547,21.299999237060547,18.299999237060547,17.100000381469727,19.5,828.2000122070312,162.1999969482422,217.89999389648438,205.5,197.60000610351562,2249.800048828125,103.0999984741211,71.69999694824219,57.599998474121094,41.400001525878906,34.5,22,20.5,21.700000762939453,18.299999237060547,17.299999237060547,19.399999618530273,666.7999877929688,214.89999389648438,212.3000030517578,125.80000305175781,67.69999694824219,56.099998474121094,45.79999923706055,38.29999923706055,33,35.400001525878906,22.700000762939453,19.399999618530273,19.899999618530273,24.100000381469727,19.299999237060547,21.299999237060547,3508.699951171875,204.10000610351562,125.4000015258789,65.30000305175781,60.79999923706055,44.099998474121094,36.29999923706055,30.5,28.600000381469727,16.5,18.600000381469727,23.700000762939453,22.299999237060547,17.600000381469727,19.200000762939453,448.79998779296875,124.4000015258789,66.5999984741211,53.5,51,45.20000076293945,28.399999618530273,29.200000762939453,26.700000762939453,25.899999618530273,18.100000381469727,17.600000381469727,20.100000381469727,25.200000762939453,3332,67.5,53.599998474121094,56.599998474121094,39.900001525878906,27.600000381469727,29.600000381469727,33.5,17.200000762939453,18.799999237060547,25.200000762939453,16.700000762939453,16.899999618530273,240.1999969482422,52.400001525878906,42.099998474121094,33.900001525878906,28,28.600000381469727,17.299999237060547,20,21,22.799999237060547,16.700000762939453,19.200000762939453,175.39999389648438,43.5,34.70000076293945,29.700000762939453,34.900001525878906,25.799999237060547,17.299999237060547,22.600000381469727,17.600000381469727,17.200000762939453,19.200000762939453,111.80000305175781,35.400001525878906,27.600000381469727,25.399999618530273,21.899999618530273,18.600000381469727,18.100000381469727,21.200000762939453,17.899999618530273,17,80.5999984741211,29.799999237060547,30.100000381469727,16,26.799999237060547,17.5,22.299999237060547,16.799999237060547,22.399999618530273,77.4000015258789,31,29.700000762939453,28.700000762939453,26,16.899999618530273,15.800000190734863,19,52.599998474121094,25.200000762939453,16.700000762939453,17.899999618530273,21,19.799999237060547,18.799999237060547,46.5,17.5,16.799999237060547,18.299999237060547,18.299999237060547,14.899999618530273,41,18.299999237060547,17.299999237060547,17,17.5,32.29999923706055,22.600000381469727,16.600000381469727,17.899999618530273,25.600000381469727,17.5,20.299999237060547,25.200000762939453,18.600000381469727,17.700000762939453]
)
data |>
@vlplot(
width=400,
height=200,
x={
"row:q",
title="Trial",
scale={nice=false},
axis={grid=false}
},
y={
"fps:q",
title="Frames Per Second (fps)",
scale={type="log"},
axis={grid=false}
},
color={
"system:n",
title="System",
legend={orient="bottom-right"}
},
"size"={value=1}
) +
@vlplot(
mark=:line,
transform=[
{window=[{field="falcon",op="row_number",as="row"}]},
{calculate="1000/datum.falcon",as="fps"},
{calculate="'Falcon'",as="system"}
]
) +
@vlplot(
mark=:line,
transform=[
{window=[{field="square",op="row_number",as="row"}]},
{calculate="1000/datum.square",as="fps"},
{calculate="'Square Crossfilter (3M)'",as="system"}
]
)
Quantile-Quantile Plot (QQ Plot)
using VegaLite, VegaDatasets
dataset("normal-2d") |>
@vlplot(
columns=2,
transform=[
{
quantile="u",
step=0.01,
as=["p","v"]
},
{
calculate="quantileUniform(datum.p)",as="unif"
},
{
calculate="quantileNormal(datum.p)",as="norm"
}
]
) + [
@vlplot(
mark=:point,
x="unif:q",
y="v:q"
);
@vlplot(
mark=:point,
x="norm:q",
y="v:q"
)
]
Linear Regression
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
layer=[{
mark={:point,filled=true},
x="Rotten_Tomatoes_Rating:q",
y="IMDB_Rating:q"
},
{
transform=[
{
regression="IMDB_Rating",
on="Rotten_Tomatoes_Rating"
}
],
mark={:line,color="firebrick"},
x="Rotten_Tomatoes_Rating:q",
y="IMDB_Rating:q"
},
{
transform=[
{
regression="IMDB_Rating",
on="Rotten_Tomatoes_Rating",
params=true
},
{
calculate="'R²: '+format(datum.rSquared, '.2f')",
as="R2"
}
],
mark={:text,color="firebrick",x="width",align="right",y=-5},
text={"R2:n"}
}]
)
Loess Regression
using VegaLite, VegaDatasets
dataset("movies") |>
@vlplot(
layer=[{
mark={:point,filled=true},
x="Rotten_Tomatoes_Rating:q",
y="IMDB_Rating:q"
},
{
transform=[
{
loess="IMDB_Rating",
on="Rotten_Tomatoes_Rating"
}
],
mark={:line,color="firebrick"},
x="Rotten_Tomatoes_Rating:q",
y="IMDB_Rating:q"
}]
)