Chat With ChatGPT - An Interactive Conversational AI

User avatar
Eli
Senior Expert Member
Reactions: 189
Posts: 5931
Joined: 10 years ago
Location: Tanzania
Contact:

#21

Grouped Bar Charts from Pandas Dataframe


1. Grouped Bar Charts: 100% Distribution Across Groups

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4.  
  5. import textwrap
  6. from textwrap import wrap
  7. textstr = 'Created at \nwww.tssfl.com'
  8.  
  9. #Create a random dataframe
  10. group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
  11. responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
  12. df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
  13.  
  14.  
  15. #Convert dataframe to preserve the original order of responses.
  16. df = pd.crosstab(df['Column1'], df['Column2'])
  17.  
  18. #Convert pandas dataframe to numpy array
  19. data = df.to_numpy()
  20.  
  21. #Calculate the total percentage for each group
  22. group_totals = np.sum(data, axis=1)
  23. total = np.sum(group_totals)
  24.  
  25. #Convert the list to a pandas Series
  26. group_series = pd.Series(group_names)
  27. #Extract unique values only - NumPy array
  28. group_names = group_series.unique()
  29.  
  30. response_series = pd.Series(responses)
  31. responses = response_series.unique()
  32.  
  33. #Plot grouped bar charts
  34. fig, ax = plt.subplots(figsize=(14,7))
  35. bar_width = 0.1
  36. opacity = 0.8
  37.  
  38. colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
  39.  
  40. for i, group_data in enumerate(data):
  41.     x = np.arange(len(responses))
  42.     bars = ax.bar(x + i * bar_width, group_data, bar_width,
  43.                   alpha=opacity, color=colors[i], label=group_names[i], align='edge')
  44.  
  45.     #Add percentage annotations for each bar
  46.     for j, bar in enumerate(bars):
  47.         percentage = '{:.2f}%'.format(100 * bar.get_height() / total)
  48.         frequency = str(int(bar.get_height()))
  49.         x_pos = bar.get_x() + bar.get_width() / 2
  50.         y_pos = bar.get_height()
  51.         ax.annotate(percentage, (x_pos, y_pos), rotation=90, xytext=(0, 15),
  52.                     textcoords="offset points", ha="center", va="bottom", color='red')
  53.         if y_pos >= 0:
  54.             ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(0, 7),
  55.                         textcoords="offset points", ha="center", va="center", color='green')
  56.  
  57. ax.spines['top'].set_visible(False)
  58. ax.spines['right'].set_visible(False)
  59. ax.spines['left'].set_visible(False)
  60. ax.spines['bottom'].set_visible(False)
  61. #Set axis labels and title
  62. ax.set_xlabel('Responses')
  63. ax.set_ylabel('Frequency')
  64. ax.set_title('Grouped Bar Charts: Frequency and 100% Distribution Across Groups', size=12, pad=40)
  65.  
  66. #Set x-axis tick labels
  67. ax.set_xticks(x + (len(group_names) - 1) * bar_width / 2)
  68. ax.set_xticklabels(responses)
  69.  
  70. #Wrap column titles for legend, break long column titles after 20 characters for legend titles
  71. #legend_title = "\n".join(textwrap.wrap(df.columns[2], 20))
  72. #legend_title = "\n".join(textwrap.wrap(column2, 30))
  73.  
  74. #Create a legend box outside the plot area
  75. legend = plt.legend(title='Legend', bbox_to_anchor=(0.96, 1.04), loc='upper left')
  76. plt.subplots_adjust(right=0.9)  #Adjust the right margin to accommodate the legend
  77.  
  78. #Add the text annotation outside the plot area
  79. #plt.figtext(0.1, 0.5, textstr, fontsize=10, verticalalignment='center')
  80. plt.gcf().text(0.02, 0.94, textstr, fontsize=14, color='green')
  81.  
  82. plt.tight_layout()
  83. plt.show()
  84.  
  85. """
  86. #Calculate the total number for each group in Column1
  87. group_counts = df['Column1'].value_counts()
  88.  
  89. #Convert the Series to a NumPy array
  90. group_totals = group_counts.values
  91. """


2. Horizontal Bar Charts

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. textstr = 'Created at \nwww.tssfl.com'
  5.  
  6. # Create a random dataframe
  7. group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
  8. responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
  9. df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
  10.  
  11. #Convert dataframe to preserve the original order of responses.
  12. df = pd.crosstab(df['Column1'], df['Column2'])
  13.  
  14. #Convert pandas dataframe to numpy array
  15. data = df.to_numpy()
  16.  
  17. # Calculate the total percentage for each group
  18. group_totals = np.sum(data, axis=1)
  19. total = np.sum(group_totals)
  20.  
  21. #Convert the list to a pandas Series
  22. group_series = pd.Series(group_names)
  23. #Extract unique values only - NumPy array
  24. group_names = group_series.unique()
  25. group_names = group_names[::-1]  #Reverse the order of group labels
  26.  
  27. response_series = pd.Series(responses)
  28. responses = response_series.unique()
  29.  
  30. # Plot grouped bar charts
  31. fig, ax = plt.subplots(figsize=(10, 10))
  32. bar_width = 0.1
  33. opacity = 0.8
  34.  
  35. colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
  36. colors = colors[:len(group_names)][::-1]  # Reverse the order of colors and limit to the number of groups
  37.  
  38. for i, group_data in enumerate(data):
  39.     x = np.arange(len(responses))
  40.     bars = ax.barh(x + i * bar_width, group_data, bar_width,
  41.                    alpha=opacity, color=colors[i], label=group_names[i])
  42.  
  43.     # Add percentage annotations for each bar
  44.     for j, bar in enumerate(bars):
  45.         percentage = '{:.2f}%'.format(100 * bar.get_width() / total)
  46.         frequency = str(int(bar.get_width()))
  47.         y_pos = bar.get_y() + bar.get_height() / 2
  48.         x_pos = bar.get_width()
  49.         ax.annotate(percentage, (x_pos, y_pos), xytext=(12, 0),
  50.                     textcoords="offset points", ha="left", va="center", color='red')
  51.        
  52.         if x_pos >= 0:
  53.             ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(6, 0),
  54.                         textcoords="offset points", ha="center", va="center", color='green')
  55.        
  56.  
  57. ax.spines['right'].set_visible(False)
  58. ax.spines['left'].set_visible(False)
  59. ax.spines['top'].set_visible(False)
  60. # Set axis labels and title
  61. ax.set_xlabel('Frequency')
  62. ax.set_ylabel('Responses')
  63. ax.set_title('Grouped Bar Charts: Frequency and 100% Distribution for Each Group', size=12, pad=20)
  64.  
  65. # Set y-axis tick labels
  66. ax.set_yticks(x + (len(group_names) - 1) * bar_width / 2)
  67. ax.set_yticklabels(responses)
  68.  
  69. #Set legend with correct ordering of colors and labels
  70. handles, labels = ax.get_legend_handles_labels()
  71. legend_bbox = bbox_to_anchor=(1.05, 1)
  72.  
  73. #Place the legend outside the plot area
  74. plt.gca().legend(handles[::-1], labels[::-1], title="Groups", loc='upper left', bbox_to_anchor=legend_bbox)
  75. plt.subplots_adjust(right=0.9)  #Adjust the right margin to accommodate the legend
  76.  
  77. #Add the text annotation outside the plot area
  78. plt.gcf().text(0.02, 0.95, textstr, fontsize=14, color='green')
  79.  
  80. #Show the plot
  81. plt.tight_layout()
  82. plt.show()


3. Grouped Bar Charts - 100% Distribution for Each Group

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4.  
  5. import textwrap
  6. from textwrap import wrap
  7. textstr = 'Created at \nwww.tssfl.com'
  8.  
  9. #Create a random dataframe
  10. group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
  11. responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
  12. df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
  13.  
  14.  
  15. #Convert dataframe to preserve the original order of responses.
  16. df = pd.crosstab(df['Column1'], df['Column2'])
  17.  
  18. #Convert pandas dataframe to numpy array
  19. data = df.to_numpy()
  20.  
  21. #Calculate the total percentage for each group
  22. group_totals = np.sum(data, axis=1)
  23.  
  24. #Convert the list to a pandas Series
  25. group_series = pd.Series(group_names)
  26. #Extract unique values only - NumPy array
  27. group_names = group_series.unique()
  28.  
  29. response_series = pd.Series(responses)
  30. responses = response_series.unique()
  31.  
  32. #Plot grouped bar charts
  33. fig, ax = plt.subplots(figsize=(14,7))
  34. bar_width = 0.1
  35. opacity = 0.8
  36.  
  37. colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
  38.  
  39. for i, group_data in enumerate(data):
  40.     x = np.arange(len(responses))
  41.     bars = ax.bar(x + i * bar_width, group_data, bar_width,
  42.                   alpha=opacity, color=colors[i], label=group_names[i], align='edge')
  43.  
  44.     #Add percentage annotations for each bar
  45.     for j, bar in enumerate(bars):
  46.         percentage = '{:.2f}%'.format(100 * bar.get_height() / group_totals[i])
  47.         frequency = str(int(bar.get_height()))
  48.         x_pos = bar.get_x() + bar.get_width() / 2
  49.         y_pos = bar.get_height()
  50.         ax.annotate(percentage, (x_pos, y_pos), rotation=90, xytext=(0, 15),
  51.                     textcoords="offset points", ha="center", va="bottom", color='red')
  52.         if y_pos >= 0:
  53.             ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(0, 7),
  54.                         textcoords="offset points", ha="center", va="center", color='green')
  55.  
  56. ax.spines['top'].set_visible(False)
  57. ax.spines['right'].set_visible(False)
  58. ax.spines['left'].set_visible(False)
  59. ax.spines['bottom'].set_visible(False)
  60. #Set axis labels and title
  61. ax.set_xlabel('Responses')
  62. ax.set_ylabel('Frequency')
  63. ax.set_title('Grouped Bar Charts: Frequency and 100% Distribution for Each Group', size=12, pad=40)
  64.  
  65. #Set x-axis tick labels
  66. ax.set_xticks(x + (len(group_names) - 1) * bar_width / 2)
  67. ax.set_xticklabels(responses)
  68.  
  69. #Wrap column titles for legend, break long column titles after 20 characters for legend titles
  70. #legend_title = "\n".join(textwrap.wrap(df.columns[2], 20))
  71. #legend_title = "\n".join(textwrap.wrap(column2, 30))
  72.  
  73. #Create a legend box outside the plot area
  74. legend = plt.legend(title='Legend', bbox_to_anchor=(0.96, 1.04), loc='upper left')
  75. plt.subplots_adjust(right=0.9)  #Adjust the right margin to accommodate the legend
  76.  
  77. #Add the text annotation outside the plot area
  78. #plt.figtext(0.1, 0.5, textstr, fontsize=10, verticalalignment='center')
  79. plt.gcf().text(0.02, 0.94, textstr, fontsize=14, color='green')
  80.  
  81. plt.tight_layout()
  82. plt.show()


4. Horizontal Bar Charts

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. textstr = 'Created at \nwww.tssfl.com'
  5.  
  6. # Create a random dataframe
  7. group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
  8. responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
  9. df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
  10.  
  11. #Convert dataframe to preserve the original order of responses.
  12. df = pd.crosstab(df['Column1'], df['Column2'])
  13.  
  14. #Convert pandas dataframe to numpy array
  15. data = df.to_numpy()
  16.  
  17. # Calculate the total percentage for each group
  18. group_totals = np.sum(data, axis=1)
  19.  
  20. #Convert the list to a pandas Series
  21. group_series = pd.Series(group_names)
  22. #Extract unique values only - NumPy array
  23. group_names = group_series.unique()
  24. group_names = group_names[::-1]  #Reverse the order of group labels
  25.  
  26. response_series = pd.Series(responses)
  27. responses = response_series.unique()
  28.  
  29. # Plot grouped bar charts
  30. fig, ax = plt.subplots(figsize=(10, 10))
  31. bar_width = 0.1
  32. opacity = 0.8
  33.  
  34. colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
  35. colors = colors[:len(group_names)][::-1]  # Reverse the order of colors and limit to the number of groups
  36.  
  37. for i, group_data in enumerate(data):
  38.     x = np.arange(len(responses))
  39.     bars = ax.barh(x + i * bar_width, group_data, bar_width,
  40.                    alpha=opacity, color=colors[i], label=group_names[i])
  41.  
  42.     # Add percentage annotations for each bar
  43.     for j, bar in enumerate(bars):
  44.         percentage = '{:.2f}%'.format(100 * bar.get_width() / group_totals[i])
  45.         frequency = str(int(bar.get_width()))
  46.         y_pos = bar.get_y() + bar.get_height() / 2
  47.         x_pos = bar.get_width()
  48.         ax.annotate(percentage, (x_pos, y_pos), xytext=(12, 0),
  49.                     textcoords="offset points", ha="left", va="center", color='red')
  50.        
  51.         if x_pos >= 0:
  52.             ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(6, 0),
  53.                         textcoords="offset points", ha="center", va="center", color='green')
  54.        
  55.  
  56. ax.spines['right'].set_visible(False)
  57. ax.spines['left'].set_visible(False)
  58. ax.spines['top'].set_visible(False)
  59. # Set axis labels and title
  60. ax.set_xlabel('Frequency')
  61. ax.set_ylabel('Responses')
  62. ax.set_title('Grouped Bar Charts: Frequency and 100% Distribution for Each Group', size=12, pad=20)
  63.  
  64. # Set y-axis tick labels
  65. ax.set_yticks(x + (len(group_names) - 1) * bar_width / 2)
  66. ax.set_yticklabels(responses)
  67.  
  68. #Set legend with correct ordering of colors and labels
  69. handles, labels = ax.get_legend_handles_labels()
  70. legend_bbox = bbox_to_anchor=(1.05, 1)
  71.  
  72. #Place the legend outside the plot area
  73. plt.gca().legend(handles[::-1], labels[::-1], title="Groups", loc='upper left', bbox_to_anchor=legend_bbox)
  74. plt.subplots_adjust(right=0.9)  #Adjust the right margin to accommodate the legend
  75.  
  76. #Add the text annotation outside the plot area
  77. plt.gcf().text(0.02, 0.95, textstr, fontsize=14, color='green')
  78.  
  79. #Show the plot
  80. plt.tight_layout()
  81. plt.show()


5. Grouped Barcharts - 100% Distribution for Each Response

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. textstr = 'Created at \nwww.tssfl.com'
  5.  
  6. #Create a random dataframe
  7. group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
  8. responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
  9. df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
  10.  
  11.  
  12. #Convert dataframe to preserve the original order of responses.
  13. df = pd.crosstab(df['Column1'], df['Column2'])
  14.  
  15. #Convert pandas dataframe to numpy array
  16. data = df.to_numpy()
  17.  
  18. #Calculate the total number for each group
  19. response_totals = np.sum(data, axis=0)
  20. normalized_data = data / response_totals[np.newaxis, :]
  21.  
  22. #Convert the list to a pandas Series
  23. group_series = pd.Series(group_names)
  24. #Extract unique values only - NumPy array
  25. group_names = group_series.unique()
  26.  
  27. response_series = pd.Series(responses)
  28. responses = response_series.unique()
  29.  
  30. #Plot grouped bar charts
  31. fig, ax = plt.subplots(figsize=(14,7))
  32. bar_width = 0.1
  33. opacity = 0.8
  34.  
  35. #colors = ['#4286f4', '#f44174', '#f4d641', '#41f45e']
  36. colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
  37.  
  38. for i, group_data in enumerate(normalized_data):
  39.     x = np.arange(len(responses))
  40.     bars = ax.bar(x + i * bar_width, group_data, bar_width,
  41.                   alpha=opacity, color=colors[i], label=group_names[i], align='edge')
  42.  
  43.     #Add percentage annotations for each bar
  44.     for j, bar in enumerate(bars):
  45.         percentage = '{:.2f}%'.format(100 * bar.get_height())
  46.         frequency = str(int(response_totals[j]*bar.get_height()))
  47.         x_pos = bar.get_x() + bar.get_width() / 2
  48.         y_pos = bar.get_height()
  49.         ax.annotate(percentage, (x_pos, y_pos), rotation=90, xytext=(0, 15),
  50.                     textcoords="offset points", ha="center", va="bottom", color='red')
  51.        
  52.         if y_pos >= 0:
  53.             ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(0, 7),
  54.                         textcoords="offset points", ha="center", va="center", color='blue')
  55.  
  56. ax.spines['top'].set_visible(False)
  57. ax.spines['right'].set_visible(False)
  58. ax.spines['left'].set_visible(False)
  59. ax.spines['bottom'].set_visible(False)
  60. #Set axis labels and title
  61. ax.set_xlabel('responses')
  62. ax.set_ylabel('Fraction')
  63. ax.set_title('Grouped Bar Charts: 100% Distribution for Each Response', size=12, pad=40)
  64.  
  65. #Set x-axis tick labels
  66. ax.set_xticks(x + (len(group_names) - 1) * bar_width / 2)
  67. ax.set_xticklabels(responses)
  68.  
  69. #Create a legend box outside the plot area
  70. legend = plt.legend(title='Groups', bbox_to_anchor=(0.96, 1.04), loc='upper left')
  71. plt.subplots_adjust(right=0.9)  #Adjust the right margin to accommodate the legend
  72.  
  73. #Add the text annotation outside the plot area
  74. plt.gcf().text(0.02, 0.94, textstr, fontsize=14, color='green')
  75.  
  76. #Show the plot
  77. plt.tight_layout()
  78. plt.show()


6. Horizontal Bar Charts Version

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. textstr = 'Created at \nwww.tssfl.com'
  5.  
  6. #Create a random dataframe
  7. group_names = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
  8. responses = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
  9. df = pd.DataFrame({'Column1': group_names, 'Column2': responses})
  10.  
  11. #Convert dataframe to preserve the original order of responses.
  12. df = pd.crosstab(df['Column1'], df['Column2'])
  13.  
  14. #Convert pandas dataframe to numpy array
  15. data = df.to_numpy()
  16.  
  17. #Calculate the total number for each response
  18. response_totals = np.sum(data, axis=0)
  19. normalized_data = data / response_totals[np.newaxis, :]
  20.  
  21. #Convert the list to a pandas Series
  22. group_series = pd.Series(group_names)
  23. #Extract unique values only - NumPy array
  24. group_names = group_series.unique()
  25.  
  26. response_series = pd.Series(responses)
  27. responses = response_series.unique()
  28.  
  29. #Plot grouped bar charts
  30. fig, ax = plt.subplots(figsize=(10, 10))
  31. bar_width = 0.1
  32. opacity = 0.8
  33.  
  34. colors = ['green', 'crimson', '#00FF00', "#FFD700", 'blue', '#4286f4', "#FF4500"]
  35. colors = colors[:len(group_names)][::-1]  # Reverse the order of colors and limit to the number of groups
  36.  
  37. for i, group_data in enumerate(normalized_data):
  38.     x = np.arange(len(responses))
  39.     bars = ax.barh(x + i * bar_width, group_data, bar_width,
  40.                    alpha=opacity, color=colors[i], label=group_names[i])
  41.  
  42.     # Add percentage annotations for each bar
  43.     for j, bar in enumerate(bars):
  44.         percentage = '{:.2f}%'.format(100 * bar.get_width())
  45.         frequency = str(int(response_totals[j]*bar.get_width()))
  46.         y_pos = bar.get_y() + bar.get_height() / 2
  47.         x_pos = bar.get_width()
  48.         ax.annotate(percentage, (x_pos, y_pos), xytext=(12, 0),
  49.                     textcoords="offset points", ha="left", va="center", color='red')
  50.        
  51.         if y_pos >= 0:
  52.             ax.annotate(frequency, (x_pos, y_pos), rotation=0, xytext=(6, 0),
  53.                         textcoords="offset points", ha="center", va="center", color='blue')
  54.            
  55. ax.spines['top'].set_visible(False)
  56. ax.spines['right'].set_visible(False)
  57. ax.spines['left'].set_visible(False)
  58. #Set axis labels and title
  59. ax.set_xlabel('Fraction (Out of 1)')
  60. ax.set_ylabel('Responses')
  61. ax.set_title('Grouped Bar Charts: 100% Distribution for Each Response', size=12, pad=30)
  62.  
  63. # Set y-axis tick labels
  64. ax.set_yticks(x + (len(group_names) - 1) * bar_width / 2)
  65. ax.set_yticklabels(responses)
  66.  
  67. #Set legend with correct ordering of colors and labels
  68. handles, labels = ax.get_legend_handles_labels()
  69. legend_bbox = bbox_to_anchor=(1.05, 1)
  70.  
  71. #Place the legend outside the plot area
  72. plt.gca().legend(reversed(handles), reversed(labels), title="Groups", loc='upper left', bbox_to_anchor=legend_bbox)
  73. plt.subplots_adjust(right=0.9)  #Adjust the right margin to accommodate the legend
  74.  
  75. #Add the text annotation outside the plot area
  76. plt.gcf().text(0.02, 0.95, textstr, fontsize=14, color='green')
  77.  
  78. #Show the plot
  79. plt.tight_layout()
  80. plt.show()

0
TSSFL -- A Creative Journey Towards Infinite Possibilities!
User avatar
Eli
Senior Expert Member
Reactions: 189
Posts: 5931
Joined: 10 years ago
Location: Tanzania
Contact:

#22

Stacked Bar Charts

Chart 1

  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4.  
  5. textstr = 'Created at \nwww.tssfl.com'
  6.  
  7. #Randomized string values for column1
  8. column1_values = np.random.choice(['agree', 'disagree', 'strongly agree', 'strongly disagree'], size=100)
  9.  
  10. #Randomized string values for column2
  11. column2_values = np.random.choice(['Group A', 'Group B', 'Group C', 'Group D', 'Group E', 'Group F', 'Group G'], size=100)
  12.  
  13. #Create the DataFrame
  14. df = pd.DataFrame({'Column1': column1_values, 'Column2': column2_values})
  15.  
  16. #Calculate the percentage distribution for each group
  17. grouped_counts = df.groupby(['Column2', 'Column1']).size()
  18. grouped_percentages = grouped_counts.groupby(level=0).apply(lambda x: 100 * x / x.sum())
  19.  
  20. #Reshape the DataFrame for plotting
  21. grouped_percentages = grouped_percentages.unstack()
  22.  
  23.  
  24. #Plotting the grouped bar charts
  25. fig, ax = plt.subplots(figsize=(10, 6))
  26. ax = grouped_percentages.plot(kind='bar', stacked=True, ax=ax)
  27.  
  28. #Set the plot labels and title
  29. plt.xlabel('Groups')
  30. plt.ylabel('Percentage')
  31. plt.title('Grouped Bar Charts')
  32.  
  33. #Add percentage labels on top of each bar
  34. for p in ax.patches:
  35.     width = p.get_width()
  36.     height = p.get_height()
  37.     x, y = p.get_xy()
  38.     ax.text(x + width/2, y + height/2, '{:.1f}%'.format(height), ha='center')
  39.  
  40. #Modify x-axis labels
  41. x_labels = [label.get_text().split(',')[0].replace('(', '').strip() for label in ax.get_xticklabels()]
  42. ax.set_xticklabels(x_labels)
  43.  
  44. #Move the legend outside the plot
  45. lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
  46.  
  47. #Display the plot
  48. ax.spines['right'].set_visible(False)
  49. ax.spines['left'].set_visible(False)
  50. ax.spines['top'].set_visible(False)
  51. ax.spines['bottom'].set_visible(False)
  52. plt.tight_layout()
  53. plt.gcf().text(0.82, 0.88, textstr, fontsize=14, color='green')
  54. plt.show()


Chart 2

  1. import matplotlib.pyplot as plt
  2. import numpy as np
  3. textstr = 'Created at \nwww.tssfl.com'
  4.  
  5. #Create data
  6. data = np.array([[35, 25, 25, 15],
  7.               [20, 30, 30, 20],
  8.               [30, 20, 25, 25],
  9.                 [40, 50, 60, 70]])
  10.  
  11. #Create the stacked percentage bar chart
  12. fig, ax = plt.subplots()
  13. ax.stackplot(np.arange(4), data, labels=['A', 'B', 'C', 'D'])
  14.  
  15. #Increase the width and size of the plot
  16. fig.set_size_inches(10, 6)
  17. ax.set_xlabel('Type', fontsize=14)
  18. ax.set_ylabel('Count', fontsize=14)
  19. ax.set_title('Stacked Bar Chart', fontsize=16)
  20. ax.legend(fontsize=12)
  21.  
  22. #Move the legend outside the plot
  23. lgd = ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
  24. #Show the plot
  25. ax.spines['right'].set_visible(False)
  26. ax.spines['left'].set_visible(False)
  27. ax.spines['top'].set_visible(False)
  28. ax.spines['bottom'].set_visible(False)
  29. plt.gcf().text(0.11, 0.9, textstr, fontsize=14, color='green')
  30. plt.tight_layout()
  31. plt.show()

0
TSSFL -- A Creative Journey Towards Infinite Possibilities!
Leace2005
Member
Reactions: 1
Posts: 1
Joined: 1 month ago

#23

Great post! I wanted to add a couple of points to clarify the process a bit further.

When you're using the web interface, make sure your API key is kept secure. It's a good practice to avoid hardcoding your key directly into the code if possible. Instead, you could store it in environment variables or use a configuration file to retrieve it dynamically. This reduces the risk of exposing your key unintentionally, especially when working with public repositories or sharing your code.

As for the Python script approach, I noticed that you're using gpt-3.5-turbo, which is optimized for chat. Just a heads-up, if you're looking for even more advanced capabilities, you can switch to gpt-4 models, which generally offer more nuanced understanding and more detailed responses. However, keep in mind that gpt-4 may come with a higher cost depending on your usage, so it’s worth considering your project’s needs and budget.

Overall, this is a solid guide for getting started, and I hope this helps those looking to integrate ChatGPT into their TSSFL Technology Stack!
1
1 Image
I use the chat from website in my work, namely for writing articles. This is useful software.
User avatar
Eli
Senior Expert Member
Reactions: 189
Posts: 5931
Joined: 10 years ago
Location: Tanzania
Contact:

#24

Side to side Horizontal bar charts - comparing two things: one bar chart on the left, the other on the right - simulation for each question/statement comparing frequency and percentage for Pre- and Post-Interventions:


  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4. textstr = 'Created at \nwww.tssfl.com'
  5.  
  6. #Demo data
  7. data_pre = {
  8.     "Question 1": ["Agree", "Disagree", "Neutral", "Strongly Agree", "Strongly Disagree",
  9.                    "Neutral", "Disagree", "Agree", "Strongly Agree", "Strongly Disagree", "Agree", "Agree", "Neutral", "Disagree", "Strongly Agree"],
  10.     "Question 2": ["Disagree", "Neutral", "Strongly Agree", "Strongly Agree", "Strongly Disagree",
  11.                    "Neutral", "Neutral", "Disagree", "Agree", "Agree", "Strongly Agree", "Neutral", "Disagree", "Strongly Agree", "Agree"],
  12.     "Question 3": ["Strongly Disagree", "Strongly Agree", "Neutral", "Disagree", "Agree",
  13.                    "Strongly Disagree", "Strongly Agree", "Neutral", "Disagree", "Agree", "Neutral", "Strongly Disagree", "Strongly Agree", "Agree", "Disagree"],
  14. }
  15.  
  16. data_post = {
  17.     "Question 1": ["Neutral", "Strongly Agree", "Strongly Agree", "Strongly Agree", "Agree",
  18.                    "Neutral", "Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree", "Agree", "Neutral", "Neutral", "Strongly Agree"],
  19.     "Question 2": ["Strongly Agree", "Strongly Agree", "Agree", "Strongly Agree", "Agree",
  20.                    "Strongly Agree", "Neutral", "Neutral", "Neutral", "Strongly Agree", "Agree", "Neutral", "Strongly Agree", "Agree", "Strongly Agree"],
  21.     "Question 3": ["Agree", "Neutral", "Agree", "Strongly Agree", "Strongly Agree",
  22.                    "Agree", "Agree", "Neutral", "Disagree", "Neutral", "Agree", "Strongly Agree", "Neutral", "Strongly Agree", "Disagree"],
  23. }
  24.  
  25. df_pre = pd.DataFrame(data_pre)
  26. df_post = pd.DataFrame(data_post)
  27.  
  28.  
  29. def plot_side_by_side_barcharts(df_pre, df_post, pre_colors, post_colors):
  30.     """
  31. Generates side-by-side horizontal bar charts. Explicitly sets bar colors.
  32. """
  33.     response_scale = ["Strongly Disagree", "Disagree", "Neutral", "Agree", "Strongly Agree"]
  34.     questions = df_pre.columns
  35.     fontsize = 16  # Define fontsize here
  36.  
  37.     for question in questions:
  38.         pre_counts = df_pre[question].value_counts().reindex(response_scale, fill_value=0)
  39.         post_counts = df_post[question].value_counts().reindex(response_scale, fill_value=0)
  40.  
  41.         pre_percentages = (pre_counts / pre_counts.sum()) * 100
  42.         post_percentages = (post_counts / post_counts.sum()) * 100
  43.  
  44.         positions = np.arange(len(response_scale))
  45.         width = 0.44
  46.         offset = 0.25 / 2.54
  47.  
  48.         fig, ax = plt.subplots(figsize=(12, 8))
  49.  
  50.         pre_bars = ax.barh(positions + width / 2, pre_counts, width, label='Pre-Intervention', color=pre_colors)
  51.         post_bars = ax.barh(positions - width / 2, post_counts, width, label='Post-Intervention', color=post_colors)
  52.  
  53.         ax.set_yticks(positions)
  54.         ax.set_yticklabels(response_scale, fontsize=fontsize)  # Set fontsize for yticklabels
  55.         ax.set_title(question, fontsize=fontsize)  #Added fontsize for title
  56.         ax.set_xlabel("Frequency", fontsize=fontsize) #Added fontsize for xlabel
  57.         ax.legend(fontsize=fontsize) # Set fontsize for legend
  58.  
  59.  
  60.         ax.set_xlim(left=0)
  61.  
  62.         #Add labels with offset from the top of bars
  63.         for bar, counts, percs, x_offset, text_color in zip(pre_bars, pre_counts, pre_percentages, [-width / 2] * len(pre_counts),
  64.                                                              ['green'] * len(pre_counts)):
  65.             ax.text(bar.get_x() + bar.get_width() + offset, bar.get_y() + bar.get_height() / 2,
  66.                     f'{int(counts)}\n({percs:.1f}%)', ha='left', va='center', color=text_color, fontsize=fontsize)
  67.  
  68.         for bar, counts, percs, x_offset, text_color in zip(post_bars, post_counts, post_percentages, [width / 2] * len(post_counts),
  69.                                                              ['green'] * len(post_counts)):
  70.             ax.text(bar.get_x() + bar.get_width() + offset, bar.get_y() + bar.get_height() / 2,
  71.                     f'{int(counts)}\n({percs:.1f}%)', ha='left', va='center', color=text_color, fontsize=fontsize)
  72.        
  73.         #Improved Legend
  74.         handles, labels = ax.get_legend_handles_labels()
  75.         ax.legend(handles, labels, ncol=1, loc="lower left", bbox_to_anchor=(0, 1.06, 0.30, 0.08),
  76.                   borderaxespad=0, mode="expand", fontsize='16')
  77.  
  78.         ax.spines['right'].set_visible(False)
  79.         ax.spines['left'].set_visible(False)
  80.         ax.spines['top'].set_visible(False)
  81.         plt.gcf().text(0.04, 0.89, textstr, fontsize=14, color='green')
  82.         plt.tight_layout()
  83.         plt.show()
  84.  
  85.  
  86. #Example usage: Define colors explicitly
  87. pre_colors = ['#f4d641']
  88. post_colors = ['#4286f4']
  89. plot_side_by_side_barcharts(df_pre, df_post, pre_colors, post_colors)


Side to side Horizontal bar charts - comparing two things: one bar chart on the left, the other on the right - simulating all questions/statements comparing responses mean score for each question/statement for both Pre- and Post-Interventions:


  1. import pandas as pd
  2. import numpy as np
  3. import matplotlib.pyplot as plt
  4.  
  5. textstr = 'Created at \nwww.tssfl.com'
  6. plt.rcParams['xtick.labelsize'] = 16  #Set xtick labelsize globally
  7.  
  8.  
  9. class DataTransformer:
  10.     def __init__(self, response_categories=None):
  11.         self.response_categories = response_categories or [
  12.             "Strongly Disagree", "Disagree", "Somewhat Agree", "Agree", "Strongly Agree"
  13.         ]
  14.         self.score_mapping = {
  15.             "Strongly Disagree": 1,
  16.             "Disagree": 2,
  17.             "Somewhat Agree": 3,
  18.             "Agree": 4,
  19.             "Strongly Agree": 5
  20.         }
  21.  
  22.     def transform_data(self, data, column_range):
  23.         """Transforms input data to calculate mean scores and percentages."""
  24.         target_columns = data.loc[:, column_range[0]:column_range[1]]
  25.         #Clean up response strings
  26.         for col in target_columns.columns:
  27.             target_columns[col] = target_columns[col].str.strip()  #Remove leading/trailing spaces
  28.             target_columns[col] = target_columns[col].str.replace(r'\s+', ' ', regex=True)  #Replace multiple spaces with single space
  29.  
  30.         results = []
  31.         for col in target_columns.columns:
  32.             #Calculate mean score
  33.             scores = target_columns[col].map(self.score_mapping)
  34.             mean_score = scores.mean()
  35.  
  36.             #Calculate percentages
  37.             response_counts = target_columns[col].value_counts(normalize=True) * 100
  38.             response_percentages = {resp: response_counts.get(resp, 0) for resp in self.response_categories}
  39.             response_percentages["Mean Score"] = mean_score
  40.             response_percentages["Question"] = col
  41.             results.append(response_percentages)
  42.  
  43.         transformed_df = pd.DataFrame(results, columns=["Question"] + self.response_categories + ["Mean Score"])
  44.         return transformed_df
  45.  
  46.     def visualize_mean_scores(self, pre_transformed, post_transformed):
  47.         """Visualizes mean scores as grouped horizontal bar charts."""
  48.         pre_questions = pre_transformed["Question"]
  49.         post_questions = post_transformed["Question"]
  50.         pre_mean = pre_transformed["Mean Score"]
  51.         post_mean = post_transformed["Mean Score"]
  52.         fontsize = 16  #Define fontsize here
  53.  
  54.         assert list(pre_questions) == list(post_questions), "Questions in Pre and Post datasets do not match."
  55.  
  56.         #Reverse the order of questions for plotting
  57.         questions = list(pre_questions)[::-1]
  58.         pre_mean = list(pre_mean)[::-1]
  59.         post_mean = list(post_mean)[::-1]
  60.         n_questions = len(questions)
  61.  
  62.         y_positions = np.arange(n_questions)
  63.         bar_width = 0.4
  64.         offset = 0.25 / 2.54
  65.         text_color = 'green'
  66.  
  67.         fig, ax = plt.subplots(figsize=(12, 8))  #Increased figure size for better readability
  68.  
  69.         pre_bars = ax.barh(
  70.             y_positions + bar_width / 2, pre_mean, bar_width, label="Pre-Intervention", color='#f4d641'
  71.         )
  72.         post_bars = ax.barh(
  73.             y_positions - bar_width / 2, post_mean, bar_width, label="Post-Intervention", color='#4286f4'
  74.         )
  75.  
  76.         #Add mean scores on top of the bars with improved formatting
  77.         for bar, mean in zip(pre_bars, pre_mean):
  78.             ax.text(
  79.                 bar.get_width() + offset,  #Adjust x-position for better visibility
  80.                 bar.get_y() + bar.get_height() / 2,
  81.                 f"{mean:.2f}",  #Display with 2 decimal places
  82.                 va="center", ha="left", color=text_color, fontsize=fontsize
  83.             )
  84.  
  85.         for bar, mean in zip(post_bars, post_mean):
  86.             ax.text(
  87.                 bar.get_width() + offset,
  88.                 bar.get_y() + bar.get_height() / 2,
  89.                 f"{mean:.2f}",
  90.                 va="center", ha="left", color=text_color, fontsize=fontsize
  91.             )
  92.  
  93.         #Improved Legend
  94.         handles, labels = ax.get_legend_handles_labels()
  95.         ax.legend(handles, labels, ncol=1, loc="lower left", bbox_to_anchor=(0, 1.06, 0.24, 0.08),
  96.                   borderaxespad=0, mode="expand", fontsize=fontsize)
  97.  
  98.         ax.set_yticks(y_positions)
  99.         ax.set_yticklabels(questions, fontsize=fontsize)
  100.         ax.set_xlabel("Mean Score", fontsize=fontsize)
  101.         ax.set_title("Comparison of Mean Scores (Pre- vs. Post-Intervention)", fontsize=fontsize)
  102.         #ax.grid(axis="x", linestyle="--", alpha=0.7)
  103.         ax.spines['right'].set_visible(False)
  104.         ax.spines['left'].set_visible(False)
  105.         ax.spines['top'].set_visible(False)
  106.         plt.gcf().text(0.8, 0.94, textstr, fontsize=14, color='green')
  107.         plt.tight_layout()
  108.         plt.show()
  109.  
  110.     def create_table(self, transformed_data):
  111.         """Generates a nicely formatted table."""
  112.         return transformed_data.set_index("Question").T
  113.  
  114.  
  115. #Generate more realistic mock data
  116. np.random.seed(42)  # for reproducibility
  117. num_questions = 5
  118. num_respondents_pre = 62
  119. num_respondents_post = 59
  120.  
  121. pre_data = pd.DataFrame({
  122.     f"Q{i + 1}": np.random.choice(list(DataTransformer().score_mapping.keys()), num_respondents_pre)
  123.     for i in range(num_questions)
  124. })
  125.  
  126. post_data = pd.DataFrame({
  127.     f"Q{i + 1}": np.random.choice(list(DataTransformer().score_mapping.keys()), num_respondents_post)
  128.     for i in range(num_questions)
  129. })
  130.  
  131. #Ensure the columns in the Pre and Post datasets match
  132. if list(pre_data.columns) != list(post_data.columns):
  133.     print("Warning: The questions in the Pre and Post datasets do not match!")
  134. else:
  135.     print("The questions in the Pre and Post datasets match correctly.")
  136.  
  137. transformer = DataTransformer()
  138. pre_transformed = transformer.transform_data(pre_data, column_range=("Q1", "Q5"))
  139. post_transformed = transformer.transform_data(post_data, column_range=("Q1", "Q5"))
  140.  
  141. print("Pre-Intervention Data:")
  142. print(transformer.create_table(pre_transformed).T)
  143. print("\nPost-Intervention Data:")
  144. print(transformer.create_table(post_transformed).T)
  145.  
  146. transformer.visualize_mean_scores(pre_transformed, post_transformed)


Opposite X-axis directed horizontal barcharts

  1. import pandas as pd
  2. import matplotlib.pyplot as plt
  3. import numpy as np
  4.  
  5. # --- Demo Data ---
  6. np.random.seed(42)  # for reproducibility
  7.  
  8. data_pre = {
  9.     'Q1': np.random.choice([1, 2, 3, 4, 5], size=20),
  10.     'Q2': np.random.choice([1, 2, 3, 4, 5], size=20),
  11.     'Q3': np.random.choice([1, 2, 3, 4, 5], size=20),
  12.     'Q4': np.random.choice([1, 2, 3, 4, 5], size=20)
  13.  
  14. }
  15. data_post = {
  16.     'Q1': np.random.choice([1, 2, 3, 4, 5], size=20),
  17.     'Q2': np.random.choice([1, 2, 3, 4, 5], size=20),
  18.     'Q3': np.random.choice([1, 2, 3, 4, 5], size=20),
  19.     'Q4': np.random.choice([1, 2, 3, 4, 5], size=20)
  20. }
  21.  
  22. df_pre = pd.DataFrame(data_pre)
  23. df_post = pd.DataFrame(data_post)
  24.  
  25.  
  26. # --- Function to create and display charts ---
  27. def create_comparison_bar_charts(df_pre, df_post):
  28.     """
  29.    Generates horizontal bar charts comparing pre- and post-intervention data.
  30.  
  31.    Args:
  32.        df_pre: Pandas DataFrame with pre-intervention data.
  33.        df_post: Pandas DataFrame with post-intervention data.
  34.    """
  35.  
  36.     num_questions = len(df_pre.columns)
  37.     fig, axes = plt.subplots(num_questions, 2, figsize=(12, 4 * num_questions),sharey = True) # Adjust figsize as needed
  38.  
  39.  
  40.     for i, question in enumerate(df_pre.columns):
  41.         #Handle potential missing values by filling with 0 or mean
  42.         pre_counts = df_pre[question].value_counts().reindex(range(1,6), fill_value=0)
  43.         post_counts = df_post[question].value_counts().reindex(range(1,6), fill_value=0)
  44.  
  45.         axes[i, 0].barh(pre_counts.index, pre_counts.values, align='center', label='Pre-Intervention')
  46.         axes[i, 1].barh(post_counts.index, post_counts.values, align='center', label='Post-Intervention')
  47.  
  48.         axes[i, 0].set_title(f'Pre-Intervention: {question}')
  49.         axes[i, 1].set_title(f'Post-Intervention: {question}')
  50.         axes[i, 0].invert_xaxis() #Inverts the x axis for better visual comparison
  51.         axes[i, 0].set_xlim(pre_counts.max()*1.1,0) #Adjust x limit
  52.         axes[i, 1].set_xlim(0,post_counts.max()*1.1)
  53.         axes[i,0].set_xlabel("Count")
  54.         axes[i,1].set_xlabel("Count")
  55.  
  56.     plt.tight_layout()  # Adjust layout to prevent overlap
  57.     plt.show()
  58.  
  59.  
  60. # --- Run the function ---
  61. create_comparison_bar_charts(df_pre, df_post)

0
TSSFL -- A Creative Journey Towards Infinite Possibilities!
Post Reply
  • Similar Topics
    Replies
    Views
    Last post

Return to “Technologies for Teaching, Learning, Research, Problem Solving and Business”

  • Information
  • Who is online

    Users browsing this forum: No registered users and 0 guests