1. Grouped Bar Charts: 100% Distribution Across Groups
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import textwrap
- from textwrap import wrap
- textstr = 'Created at \nwww.tssfl.com'
- #Create a random dataframe
- group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
- responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
- df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
- #Convert dataframe to preserve the original order of responses.
- df = pd.crosstab(df['Column1'], df['Column2'])
- #Convert pandas dataframe to numpy array
- data = df.to_numpy()
- #Calculate the total percentage for each group
- group_totals = np.sum(data, axis=1)
- total = np.sum(group_totals)
- #Convert the list to a pandas Series
- group_series = pd.Series(group_names)
- #Extract unique values only - NumPy array
- group_names = group_series.unique()
- response_series = pd.Series(responses)
- responses = response_series.unique()
- #Plot grouped bar charts
- fig, ax = plt.subplots(figsize=(14,7))
- bar_width = 0.1
- opacity = 0.8
- colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
- for i, group_data in enumerate(data):
- x = np.arange(len(responses))
- bars = ax.bar(x + i * bar_width, group_data, bar_width,
- alpha=opacity, color=colors[i], label=group_names[i], align='edge')
- #Add percentage annotations for each bar
- for j, bar in enumerate(bars):
- percentage = '{:.2f}%'.format(100 * bar.get_height() / total)
- frequency = str(int(bar.get_height()))
- x_pos = bar.get_x() + bar.get_width() / 2
- y_pos = bar.get_height()
- ax.annotate(percentage, (x_pos, y_pos), rotation=90, xytext=(0, 15),
- textcoords="offset points", ha="center", va="bottom", color='red')
- if y_pos >= 0:
- ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(0, 7),
- textcoords="offset points", ha="center", va="center", color='green')
- ax.spines['top'].set_visible(False)
- ax.spines['right'].set_visible(False)
- ax.spines['left'].set_visible(False)
- ax.spines['bottom'].set_visible(False)
- #Set axis labels and title
- ax.set_xlabel('Responses')
- ax.set_ylabel('Frequency')
- ax.set_title('Grouped Bar Charts: Frequency and 100% Distribution Across Groups', size=12, pad=40)
- #Set x-axis tick labels
- ax.set_xticks(x + (len(group_names) - 1) * bar_width / 2)
- ax.set_xticklabels(responses)
- #Wrap column titles for legend, break long column titles after 20 characters for legend titles
- #legend_title = "\n".join(textwrap.wrap(df.columns[2], 20))
- #legend_title = "\n".join(textwrap.wrap(column2, 30))
- #Create a legend box outside the plot area
- legend = plt.legend(title='Legend', bbox_to_anchor=(0.96, 1.04), loc='upper left')
- plt.subplots_adjust(right=0.9) #Adjust the right margin to accommodate the legend
- #Add the text annotation outside the plot area
- #plt.figtext(0.1, 0.5, textstr, fontsize=10, verticalalignment='center')
- plt.gcf().text(0.02, 0.94, textstr, fontsize=14, color='green')
- plt.tight_layout()
- plt.show()
- """
- #Calculate the total number for each group in Column1
- group_counts = df['Column1'].value_counts()
- #Convert the Series to a NumPy array
- group_totals = group_counts.values
- """
2. Horizontal Bar Charts
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- textstr = 'Created at \nwww.tssfl.com'
- # Create a random dataframe
- group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
- responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
- df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
- #Convert dataframe to preserve the original order of responses.
- df = pd.crosstab(df['Column1'], df['Column2'])
- #Convert pandas dataframe to numpy array
- data = df.to_numpy()
- # Calculate the total percentage for each group
- group_totals = np.sum(data, axis=1)
- total = np.sum(group_totals)
- #Convert the list to a pandas Series
- group_series = pd.Series(group_names)
- #Extract unique values only - NumPy array
- group_names = group_series.unique()
- group_names = group_names[::-1] #Reverse the order of group labels
- response_series = pd.Series(responses)
- responses = response_series.unique()
- # Plot grouped bar charts
- fig, ax = plt.subplots(figsize=(10, 10))
- bar_width = 0.1
- opacity = 0.8
- colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
- colors = colors[:len(group_names)][::-1] # Reverse the order of colors and limit to the number of groups
- for i, group_data in enumerate(data):
- x = np.arange(len(responses))
- bars = ax.barh(x + i * bar_width, group_data, bar_width,
- alpha=opacity, color=colors[i], label=group_names[i])
- # Add percentage annotations for each bar
- for j, bar in enumerate(bars):
- percentage = '{:.2f}%'.format(100 * bar.get_width() / total)
- frequency = str(int(bar.get_width()))
- y_pos = bar.get_y() + bar.get_height() / 2
- x_pos = bar.get_width()
- ax.annotate(percentage, (x_pos, y_pos), xytext=(12, 0),
- textcoords="offset points", ha="left", va="center", color='red')
- if x_pos >= 0:
- ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(6, 0),
- textcoords="offset points", ha="center", va="center", color='green')
- ax.spines['right'].set_visible(False)
- ax.spines['left'].set_visible(False)
- ax.spines['top'].set_visible(False)
- # Set axis labels and title
- ax.set_xlabel('Frequency')
- ax.set_ylabel('Responses')
- ax.set_title('Grouped Bar Charts: Frequency and 100% Distribution for Each Group', size=12, pad=20)
- # Set y-axis tick labels
- ax.set_yticks(x + (len(group_names) - 1) * bar_width / 2)
- ax.set_yticklabels(responses)
- #Set legend with correct ordering of colors and labels
- handles, labels = ax.get_legend_handles_labels()
- legend_bbox = bbox_to_anchor=(1.05, 1)
- #Place the legend outside the plot area
- plt.gca().legend(handles[::-1], labels[::-1], title="Groups", loc='upper left', bbox_to_anchor=legend_bbox)
- plt.subplots_adjust(right=0.9) #Adjust the right margin to accommodate the legend
- #Add the text annotation outside the plot area
- plt.gcf().text(0.02, 0.95, textstr, fontsize=14, color='green')
- #Show the plot
- plt.tight_layout()
- plt.show()
3. Grouped Bar Charts - 100% Distribution for Each Group
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- import textwrap
- from textwrap import wrap
- textstr = 'Created at \nwww.tssfl.com'
- #Create a random dataframe
- group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
- responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
- df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
- #Convert dataframe to preserve the original order of responses.
- df = pd.crosstab(df['Column1'], df['Column2'])
- #Convert pandas dataframe to numpy array
- data = df.to_numpy()
- #Calculate the total percentage for each group
- group_totals = np.sum(data, axis=1)
- #Convert the list to a pandas Series
- group_series = pd.Series(group_names)
- #Extract unique values only - NumPy array
- group_names = group_series.unique()
- response_series = pd.Series(responses)
- responses = response_series.unique()
- #Plot grouped bar charts
- fig, ax = plt.subplots(figsize=(14,7))
- bar_width = 0.1
- opacity = 0.8
- colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
- for i, group_data in enumerate(data):
- x = np.arange(len(responses))
- bars = ax.bar(x + i * bar_width, group_data, bar_width,
- alpha=opacity, color=colors[i], label=group_names[i], align='edge')
- #Add percentage annotations for each bar
- for j, bar in enumerate(bars):
- percentage = '{:.2f}%'.format(100 * bar.get_height() / group_totals[i])
- frequency = str(int(bar.get_height()))
- x_pos = bar.get_x() + bar.get_width() / 2
- y_pos = bar.get_height()
- ax.annotate(percentage, (x_pos, y_pos), rotation=90, xytext=(0, 15),
- textcoords="offset points", ha="center", va="bottom", color='red')
- if y_pos >= 0:
- ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(0, 7),
- textcoords="offset points", ha="center", va="center", color='green')
- ax.spines['top'].set_visible(False)
- ax.spines['right'].set_visible(False)
- ax.spines['left'].set_visible(False)
- ax.spines['bottom'].set_visible(False)
- #Set axis labels and title
- ax.set_xlabel('Responses')
- ax.set_ylabel('Frequency')
- ax.set_title('Grouped Bar Charts: Frequency and 100% Distribution for Each Group', size=12, pad=40)
- #Set x-axis tick labels
- ax.set_xticks(x + (len(group_names) - 1) * bar_width / 2)
- ax.set_xticklabels(responses)
- #Wrap column titles for legend, break long column titles after 20 characters for legend titles
- #legend_title = "\n".join(textwrap.wrap(df.columns[2], 20))
- #legend_title = "\n".join(textwrap.wrap(column2, 30))
- #Create a legend box outside the plot area
- legend = plt.legend(title='Legend', bbox_to_anchor=(0.96, 1.04), loc='upper left')
- plt.subplots_adjust(right=0.9) #Adjust the right margin to accommodate the legend
- #Add the text annotation outside the plot area
- #plt.figtext(0.1, 0.5, textstr, fontsize=10, verticalalignment='center')
- plt.gcf().text(0.02, 0.94, textstr, fontsize=14, color='green')
- plt.tight_layout()
- plt.show()
4. Horizontal Bar Charts
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- textstr = 'Created at \nwww.tssfl.com'
- # Create a random dataframe
- group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
- responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
- df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
- #Convert dataframe to preserve the original order of responses.
- df = pd.crosstab(df['Column1'], df['Column2'])
- #Convert pandas dataframe to numpy array
- data = df.to_numpy()
- # Calculate the total percentage for each group
- group_totals = np.sum(data, axis=1)
- #Convert the list to a pandas Series
- group_series = pd.Series(group_names)
- #Extract unique values only - NumPy array
- group_names = group_series.unique()
- group_names = group_names[::-1] #Reverse the order of group labels
- response_series = pd.Series(responses)
- responses = response_series.unique()
- # Plot grouped bar charts
- fig, ax = plt.subplots(figsize=(10, 10))
- bar_width = 0.1
- opacity = 0.8
- colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
- colors = colors[:len(group_names)][::-1] # Reverse the order of colors and limit to the number of groups
- for i, group_data in enumerate(data):
- x = np.arange(len(responses))
- bars = ax.barh(x + i * bar_width, group_data, bar_width,
- alpha=opacity, color=colors[i], label=group_names[i])
- # Add percentage annotations for each bar
- for j, bar in enumerate(bars):
- percentage = '{:.2f}%'.format(100 * bar.get_width() / group_totals[i])
- frequency = str(int(bar.get_width()))
- y_pos = bar.get_y() + bar.get_height() / 2
- x_pos = bar.get_width()
- ax.annotate(percentage, (x_pos, y_pos), xytext=(12, 0),
- textcoords="offset points", ha="left", va="center", color='red')
- if x_pos >= 0:
- ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(6, 0),
- textcoords="offset points", ha="center", va="center", color='green')
- ax.spines['right'].set_visible(False)
- ax.spines['left'].set_visible(False)
- ax.spines['top'].set_visible(False)
- # Set axis labels and title
- ax.set_xlabel('Frequency')
- ax.set_ylabel('Responses')
- ax.set_title('Grouped Bar Charts: Frequency and 100% Distribution for Each Group', size=12, pad=20)
- # Set y-axis tick labels
- ax.set_yticks(x + (len(group_names) - 1) * bar_width / 2)
- ax.set_yticklabels(responses)
- #Set legend with correct ordering of colors and labels
- handles, labels = ax.get_legend_handles_labels()
- legend_bbox = bbox_to_anchor=(1.05, 1)
- #Place the legend outside the plot area
- plt.gca().legend(handles[::-1], labels[::-1], title="Groups", loc='upper left', bbox_to_anchor=legend_bbox)
- plt.subplots_adjust(right=0.9) #Adjust the right margin to accommodate the legend
- #Add the text annotation outside the plot area
- plt.gcf().text(0.02, 0.95, textstr, fontsize=14, color='green')
- #Show the plot
- plt.tight_layout()
- plt.show()
5. Grouped Barcharts - 100% Distribution for Each Response
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- textstr = 'Created at \nwww.tssfl.com'
- #Create a random dataframe
- group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
- responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
- df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
- #Convert dataframe to preserve the original order of responses.
- df = pd.crosstab(df['Column1'], df['Column2'])
- #Convert pandas dataframe to numpy array
- data = df.to_numpy()
- #Calculate the total number for each group
- response_totals = np.sum(data, axis=0)
- normalized_data = data / response_totals[np.newaxis, :]
- #Convert the list to a pandas Series
- group_series = pd.Series(group_names)
- #Extract unique values only - NumPy array
- group_names = group_series.unique()
- response_series = pd.Series(responses)
- responses = response_series.unique()
- #Plot grouped bar charts
- fig, ax = plt.subplots(figsize=(14,7))
- bar_width = 0.1
- opacity = 0.8
- #colors = ['#4286f4', '#f44174', '#f4d641', '#41f45e']
- colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
- for i, group_data in enumerate(normalized_data):
- x = np.arange(len(responses))
- bars = ax.bar(x + i * bar_width, group_data, bar_width,
- alpha=opacity, color=colors[i], label=group_names[i], align='edge')
- #Add percentage annotations for each bar
- for j, bar in enumerate(bars):
- percentage = '{:.2f}%'.format(100 * bar.get_height())
- frequency = str(int(response_totals[j]*bar.get_height()))
- x_pos = bar.get_x() + bar.get_width() / 2
- y_pos = bar.get_height()
- ax.annotate(percentage, (x_pos, y_pos), rotation=90, xytext=(0, 15),
- textcoords="offset points", ha="center", va="bottom", color='red')
- if y_pos >= 0:
- ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(0, 7),
- textcoords="offset points", ha="center", va="center", color='blue')
- ax.spines['top'].set_visible(False)
- ax.spines['right'].set_visible(False)
- ax.spines['left'].set_visible(False)
- ax.spines['bottom'].set_visible(False)
- #Set axis labels and title
- ax.set_xlabel('responses')
- ax.set_ylabel('Fraction')
- ax.set_title('Grouped Bar Charts: 100% Distribution for Each Response', size=12, pad=40)
- #Set x-axis tick labels
- ax.set_xticks(x + (len(group_names) - 1) * bar_width / 2)
- ax.set_xticklabels(responses)
- #Create a legend box outside the plot area
- legend = plt.legend(title='Groups', bbox_to_anchor=(0.96, 1.04), loc='upper left')
- plt.subplots_adjust(right=0.9) #Adjust the right margin to accommodate the legend
- #Add the text annotation outside the plot area
- plt.gcf().text(0.02, 0.94, textstr, fontsize=14, color='green')
- #Show the plot
- plt.tight_layout()
- plt.show()
6. Horizontal Bar Charts Version
- import pandas as pd
- import numpy as np
- import matplotlib.pyplot as plt
- textstr = 'Created at \nwww.tssfl.com'
- #Create a random dataframe
- group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
- responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
- df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
- #Convert dataframe to preserve the original order of responses.
- df = pd.crosstab(df['Column1'], df['Column2'])
- #Convert pandas dataframe to numpy array
- data = df.to_numpy()
- #Calculate the total number for each response
- response_totals = np.sum(data, axis=0)
- normalized_data = data / response_totals[np.newaxis, :]
- #Convert the list to a pandas Series
- group_series = pd.Series(group_names)
- #Extract unique values only - NumPy array
- group_names = group_series.unique()
- response_series = pd.Series(responses)
- responses = response_series.unique()
- #Plot grouped bar charts
- fig, ax = plt.subplots(figsize=(10, 10))
- bar_width = 0.1
- opacity = 0.8
- colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
- colors = colors[:len(group_names)][::-1] # Reverse the order of colors and limit to the number of groups
- for i, group_data in enumerate(normalized_data):
- x = np.arange(len(responses))
- bars = ax.barh(x + i * bar_width, group_data, bar_width,
- alpha=opacity, color=colors[i], label=group_names[i])
- # Add percentage annotations for each bar
- for j, bar in enumerate(bars):
- percentage = '{:.2f}%'.format(100 * bar.get_width())
- frequency = str(int(response_totals[j]*bar.get_width()))
- y_pos = bar.get_y() + bar.get_height() / 2
- x_pos = bar.get_width()
- ax.annotate(percentage, (x_pos, y_pos), xytext=(12, 0),
- textcoords="offset points", ha="left", va="center", color='red')
- if y_pos >= 0:
- ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(6, 0),
- textcoords="offset points", ha="center", va="center", color='blue')
- ax.spines['top'].set_visible(False)
- ax.spines['right'].set_visible(False)
- ax.spines['left'].set_visible(False)
- #Set axis labels and title
- ax.set_xlabel('Fraction (Out of 1)')
- ax.set_ylabel('Responses')
- ax.set_title('Grouped Bar Charts: 100% Distribution for Each Response', size=12, pad=30)
- # Set y-axis tick labels
- ax.set_yticks(x + (len(group_names) - 1) * bar_width / 2)
- ax.set_yticklabels(responses)
- #Set legend with correct ordering of colors and labels
- handles, labels = ax.get_legend_handles_labels()
- legend_bbox = bbox_to_anchor=(1.05, 1)
- #Place the legend outside the plot area
- plt.gca().legend(reversed(handles), reversed(labels), title="Groups", loc='upper left', bbox_to_anchor=legend_bbox)
- plt.subplots_adjust(right=0.9) #Adjust the right margin to accommodate the legend
- #Add the text annotation outside the plot area
- plt.gcf().text(0.02, 0.95, textstr, fontsize=14, color='green')
- #Show the plot
- plt.tight_layout()
- plt.show()