# Plot a random sample of 5k articles
scatter = alt.Chart(df.sample(n=5000, random_state=1)).mark_point().encode(
alt.X("TIMESTAMP", axis=alt.Axis(title="")),
y=alt.Y('compound', axis=alt.Axis(title="")),
color=alt.Color('compound:Q', scale=alt.Scale(scheme='redyellowgreen')),
tooltip=['TITLE', 'PUBLISHER','compound:Q', 'TIMESTAMP']
# Get the 10 largest publishers
largest_10 = (df.groupby(by=["PUBLISHER"])["ID"]
.rename(columns={"ID": "count"})
# Truncate by 30-day periods
df["date"] = df['TIMESTAMP'].dt.floor(freq='30D')
line = alt.Chart(df[df.PUBLISHER.isin(largest_10.PUBLISHER)]).mark_line(clip=True).encode(
alt.X("date", axis=alt.Axis(title="")),
y=alt.Y('average(compound)', axis=alt.Axis(title=""), scale=alt.Scale(domain=(-0.15, 0.15))),
color=alt.Color('PUBLISHER:O'),
tooltip=['PUBLISHER','average(compound):Q', 'date']