plotting a histogram using a range of values and their frequency as a dictionary

  • Last Update :
  • Techknowledgy :

Since the bins (ranges) are already defined and their counts are already aggregated at an initial level, maybe it can help if you build something that overlays a histogram (distribution) on the top of the existing bin ranges:

import matplotlib
   %
   matplotlib inline
def plot_hist(bins, input_dict):
   df1 = pd.DataFrame(input_dict).reset_index()
df1['min'] = df1['index'].apply(lambda x: x.split('-')[0]).astype(int)
df1['max'] = df1['index'].apply(lambda x: x.split('-')[1]).astype(int)
df1['group'] = pd.cut(df1['max'], bins, labels = False)
df2 = df1.groupby('group' [
         ['Day1', 'min', 'max']
      ].agg({
         'min': 'min',
         'max': 'max',
         'Day1': 'sum'
      }).reset_index() df2['range_new'] = df2['min'].astype(str) + str('-') + df2['max'].astype(str) df2.plot(x = 'range_new', y = 'Day1', kind = 'bar')

...and call the function by choosing bins lesser than the length of the dictionary - or the first level of 98 bins that are already there, like, say if you want a distribution of 20 groups aggregate:

plot_hist(20, scenario_summary)

The code with your data, as a MCVE :

import matplotlib.pyplot as plt

scenario_summary = {
   'Day1': {
      '22459-22585': 0.0,
      '22585-22711': 0.0,
      '22711-22837': 0.0,
      '22837-22963': 0.0,
      '22963-23089': 0.0,
      '23089-23215': 0.0,
      '23215-23341': 0.0,
      '23341-23467': 0.0,
      '23467-23593': 0.0,
      '23593-23719': 0.0,
      '23719-23845': 0.0,
      '23845-23971': 0.0,
      '23971-24097': 0.0,
      '24097-24223': 0.0,
      '24223-24349': 0.0,
      '24349-24475': 0.0,
      '24475-24601': 0.0,
      '24601-24727': 0.0,
      '24727-24853': 0.0,
      '24853-24979': 0.0,
      '24979-25105': 0.0,
      '25105-25231': 0.0,
      '25231-25357': 0.0,
      '25357-25483': 0.0,
      '25483-25609': 0.0,
      '25609-25735': 0.0,
      '25735-25861': 0.0,
      '25861-25987': 0.0,
      '25987-26113': 1.0,
      '26113-26239': 1.0,
      '26239-26365': 0.0,
      '26365-26491': 2.0,
      '26491-26617': 5.0,
      '26617-26743': 5.0,
      '26743-26869': 5.0,
      '26869-26995': 12.0,
      '26995-27121': 19.0,
      '27121-27247': 7.0,
      '27247-27373': 11.0,
      '27373-27499': 15.0,
      '27499-27625': 7.0,
      '27625-27751': 4.0,
      '27751-27877': 4.0,
      '27877-28003': 2.0,
      '28003-28129': 0.0,
      '28129-28255': 0.0,
      '28255-28381': 0.0,
      '28381-28507': 0.0,
      '28507-28633': 0.0,
      '28633-28759': 0.0,
      '28759-28885': 0.0,
      '28885-29011': 0.0,
      '29011-29137': 0.0,
      '29137-29263': 0.0,
      '29263-29389': 0.0,
      '29389-29515': 0.0,
      '29515-29641': 0.0,
      '29641-29767': 0.0,
      '29767-29893': 0.0,
      '29893-30019': 0.0,
      '30019-30145': 0.0,
      '30145-30271': 0.0,
      '30271-30397': 0.0,
      '30397-30523': 0.0,
      '30523-30649': 0.0,
      '30649-30775': 0.0,
      '30775-30901': 0.0,
      '30901-31027': 0.0,
      '31027-31153': 0.0,
      '31153-31279': 0.0,
      '31279-31405': 0.0,
      '31405-31531': 0.0,
      '31531-31657': 0.0,
      '31657-31783': 0.0,
      '31783-31909': 0.0,
      '31909-32035': 0.0,
      '32035-32161': 0.0,
      '32161-32287': 0.0,
      '32287-32413': 0.0,
      '32413-32539': 0.0,
      '32539-32665': 0.0,
      '32665-32791': 0.0,
      '32791-32917': 0.0,
      '32917-33043': 0.0,
      '33043-33169': 0.0,
      '33169-33295': 0.0,
      '33295-33421': 0.0,
      '33421-33547': 0.0,
      '33547-33673': 0.0,
      '33673-33799': 0.0,
      '33799-33925': 0.0,
      '33925-34051': 0.0,
      '34051-34177': 0.0,
      '34177-34303': 0.0,
      '34303-34429': 0.0,
      '34429-34555': 0.0,
      '34555-34681': 0.0,
      '34681-34807': 0.0
   }
}

data = scenario_summary['Day1']

x = range(len(data))
y = list(data.values())

plt.figure(figsize = (16, 9))
plt.bar(x, y)
plt.subplots_adjust(bottom = 0.2)
plt.xticks(x, data.keys(), rotation = 'vertical')
plt.show()

Suggestion : 2

Last updated on May 16, 2021

our_dict = {}
# creating an instance of the class dictionary
our_dict = dict()

# creating a dictionary literal
marks = {
   "Alan": 92,
   "Turing": 88
}
marks = {}

marks["Alan"] = 92
marks["Turing"] = 88

print(marks)

# {
   'Alan': 92,
   'Turing': 88
}
word_counts = {}

for word in document:
   if word in word_counts:
   word_counts[word] += 1
else:
   word_counts[word]
from collections
import defaultdict

word_counts = defaultdict(int)
for word in document:
   word_counts[word] += 1
from collections
import Counter

list1 = [1, 2, 1, 2, 3, 4, 5, 2, 3, 4, 5, 5, 1, 2]
counts = Counter(list1)
print(counts)

# Counter({
   2: 4,
   1: 3,
   5: 3,
   3: 2,
   4: 2
})

Suggestion : 3

A histogram is basically a simple bar chart, where each bar represents a bin (usually in the form of a range) and a frequency of the elements that fall into that bin.,Since the bins (ranges) are already defined and their counts are already aggregated at an initial level, maybe it can help if you build something that overlays a histogram (distribution) on the top of the existing bin ranges: ,Plotting a histogram using a range of values and their frequency as a dictionary,...and call the function by choosing bins lesser than the length of the dictionary - or the first level of 98 bins that are already there, like, say if you want a distribution of 20 groups aggregate:

The code with your data, as a MCVE :

import matplotlib.pyplot as plt

scenario_summary = {
   'Day1': {
      '22459-22585': 0.0,
      '22585-22711': 0.0,
      '22711-22837': 0.0,
      '22837-22963': 0.0,
      '22963-23089': 0.0,
      '23089-23215': 0.0,
      '23215-23341': 0.0,
      '23341-23467': 0.0,
      '23467-23593': 0.0,
      '23593-23719': 0.0,
      '23719-23845': 0.0,
      '23845-23971': 0.0,
      '23971-24097': 0.0,
      '24097-24223': 0.0,
      '24223-24349': 0.0,
      '24349-24475': 0.0,
      '24475-24601': 0.0,
      '24601-24727': 0.0,
      '24727-24853': 0.0,
      '24853-24979': 0.0,
      '24979-25105': 0.0,
      '25105-25231': 0.0,
      '25231-25357': 0.0,
      '25357-25483': 0.0,
      '25483-25609': 0.0,
      '25609-25735': 0.0,
      '25735-25861': 0.0,
      '25861-25987': 0.0,
      '25987-26113': 1.0,
      '26113-26239': 1.0,
      '26239-26365': 0.0,
      '26365-26491': 2.0,
      '26491-26617': 5.0,
      '26617-26743': 5.0,
      '26743-26869': 5.0,
      '26869-26995': 12.0,
      '26995-27121': 19.0,
      '27121-27247': 7.0,
      '27247-27373': 11.0,
      '27373-27499': 15.0,
      '27499-27625': 7.0,
      '27625-27751': 4.0,
      '27751-27877': 4.0,
      '27877-28003': 2.0,
      '28003-28129': 0.0,
      '28129-28255': 0.0,
      '28255-28381': 0.0,
      '28381-28507': 0.0,
      '28507-28633': 0.0,
      '28633-28759': 0.0,
      '28759-28885': 0.0,
      '28885-29011': 0.0,
      '29011-29137': 0.0,
      '29137-29263': 0.0,
      '29263-29389': 0.0,
      '29389-29515': 0.0,
      '29515-29641': 0.0,
      '29641-29767': 0.0,
      '29767-29893': 0.0,
      '29893-30019': 0.0,
      '30019-30145': 0.0,
      '30145-30271': 0.0,
      '30271-30397': 0.0,
      '30397-30523': 0.0,
      '30523-30649': 0.0,
      '30649-30775': 0.0,
      '30775-30901': 0.0,
      '30901-31027': 0.0,
      '31027-31153': 0.0,
      '31153-31279': 0.0,
      '31279-31405': 0.0,
      '31405-31531': 0.0,
      '31531-31657': 0.0,
      '31657-31783': 0.0,
      '31783-31909': 0.0,
      '31909-32035': 0.0,
      '32035-32161': 0.0,
      '32161-32287': 0.0,
      '32287-32413': 0.0,
      '32413-32539': 0.0,
      '32539-32665': 0.0,
      '32665-32791': 0.0,
      '32791-32917': 0.0,
      '32917-33043': 0.0,
      '33043-33169': 0.0,
      '33169-33295': 0.0,
      '33295-33421': 0.0,
      '33421-33547': 0.0,
      '33547-33673': 0.0,
      '33673-33799': 0.0,
      '33799-33925': 0.0,
      '33925-34051': 0.0,
      '34051-34177': 0.0,
      '34177-34303': 0.0,
      '34303-34429': 0.0,
      '34429-34555': 0.0,
      '34555-34681': 0.0,
      '34681-34807': 0.0
   }
}

data = scenario_summary['Day1']

x = range(len(data))
y = list(data.values())

plt.figure(figsize = (16, 9))
plt.bar(x, y)
plt.subplots_adjust(bottom = 0.2)
plt.xticks(x, data.keys(), rotation = 'vertical')
plt.show()

Since the bins (ranges) are already defined and their counts are already aggregated at an initial level, maybe it can help if you build something that overlays a histogram (distribution) on the top of the existing bin ranges:

import matplotlib
   %
   matplotlib inline
def plot_hist(bins, input_dict):
   df1 = pd.DataFrame(input_dict).reset_index()
df1['min'] = df1['index'].apply(lambda x: x.split('-')[0]).astype(int)
df1['max'] = df1['index'].apply(lambda x: x.split('-')[1]).astype(int)
df1['group'] = pd.cut(df1['max'], bins, labels = False)
df2 = df1.groupby('group' [
         ['Day1', 'min', 'max']
      ].agg({
         'min': 'min',
         'max': 'max',
         'Day1': 'sum'
      }).reset_index() df2['range_new'] = df2['min'].astype(str) + str('-') + df2['max'].astype(str) df2.plot(x = 'range_new', y = 'Day1', kind = 'bar')

...and call the function by choosing bins lesser than the length of the dictionary - or the first level of 98 bins that are already there, like, say if you want a distribution of 20 groups aggregate:

plot_hist(20, scenario_summary)

Suggestion : 4

February 23, 2019

If you want to mathemetically split a given array to bins and frequencies, use the numpy histogram() method and pretty print it like below.

import numpy as np
x = np.random.randint(low = 0, high = 100, size = 100)

# Compute frequency and bins
frequency, bins = np.histogram(x, bins = 10, range = [0, 100])

# Pretty Print
for b, f in zip(bins[1: ], frequency):
   print(round(b, 1), ' '.join(np.repeat('*', f)))

The output of above code looks like this:

10.0 * * * * * * * * *
   20.0 * * * * * * * * * * * * *
   30.0 * * * * * * * * *
   40.0 * * * * * * * * * * * * * * *
   50.0 * * * * * * * * *
   60.0 * * * * * * * * *
   70.0 * * * * * * * * * * * * * * * *
   80.0 * * * * *
   90.0 * * * * * * * * *
   100.0 * * * * * *

The pyplot.hist() in matplotlib lets you draw the histogram. It required the array as the required input and you can specify the number of bins needed.

import matplotlib.pyplot as plt %
   matplotlib inline
plt.rcParams.update({
   'figure.figsize': (7, 5),
   'figure.dpi': 100
})

# Plot Histogram on x
x = np.random.normal(size = 1000)
plt.hist(x, bins = 50)
plt.gca().set(title = 'Frequency Histogram', ylabel = 'Frequency');

Let’s compare the distribution of diamond depth for 3 different values of diamond cut in the same plot.

x1 = df.loc[df.cut == 'Ideal', 'depth']
x2 = df.loc[df.cut == 'Fair', 'depth']
x3 = df.loc[df.cut == 'Good', 'depth']

kwargs = dict(alpha = 0.5, bins = 100)

plt.hist(x1, ** kwargs, color = 'g', label = 'Ideal')
plt.hist(x2, ** kwargs, color = 'b', label = 'Fair')
plt.hist(x3, ** kwargs, color = 'r', label = 'Good')
plt.gca().set(title = 'Frequency Histogram of Diamond Depths', ylabel = 'Frequency')
plt.xlim(50, 75)
plt.legend();

You can normalize it by setting density=True and stacked=True. By doing this the total area under each distribution becomes 1.

# Normalize
kwargs = dict(alpha = 0.5, bins = 100, density = True, stacked = True)

# Plot
plt.hist(x1, ** kwargs, color = 'g', label = 'Ideal')
plt.hist(x2, ** kwargs, color = 'b', label = 'Fair')
plt.hist(x3, ** kwargs, color = 'r', label = 'Good')
plt.gca().set(title = 'Probability Histogram of Diamond Depths', ylabel = 'Probability')
plt.xlim(50, 75)
plt.legend();
# Solution
import seaborn as sns
df = sns.load_dataset('iris')

plt.subplots(figsize = (7, 6), dpi = 100)
sns.distplot(df.loc[df.species == 'setosa', "sepal_length"], color = "dodgerblue", label = "Setosa")
sns.distplot(df.loc[df.species == 'virginica', "sepal_length"], color = "orange", label = "virginica")
sns.distplot(df.loc[df.species == 'versicolor', "sepal_length"], color = "deeppink", label = "versicolor")

plt.title('Iris Histogram')
plt.legend();

Suggestion : 5

June 22, 2020March 8, 2022

Let’s begin by loading the required libraries and our dataset. We can then create histograms using Python on the age column, to visualize the distribution of that variable.

import pandas as pd
import matplotlib.pyplot as plt
df = pd.read_excel('https://github.com/datagy/Intro-to-Python/raw/master/sportsdata.xls', usecols = ['Age'])

print(df.describe())

# Returns:
   # Age
# count 5000.000000
# mean 25.012200
# std 5.013849
# min 4.000000
# 25 % 22.000000
# 50 % 25.000000
# 75 % 28.000000
# max 43.000000

The easiest way to create a histogram using Matplotlib, is simply to call the hist function:

plt.hist(df['Age'])

If you wanted to let your histogram have 9 bins, you could write:

plt.hist(df['Age'], bins = 9)

For example, if you wanted to exclude ages under 20, you could write:

plt.hist(df['Age'], bins = [20, 25, 35, 40, 45, 50])

If your data has some bins with dramatically more data than other bins, it may be useful to visualize the data using a logarithmic scale. This can be accomplished using the log=True argument:

plt.hist(df['Age'], bins = range(0, 55, 5), log = True)