###################################################################

# This file is unit08_histogramObject.py. It defines the class 
# "histogram" and contains tools for creating an hbook-style histogram.
# (hbook is a histogramming package developed at CERN.)

# It also houses various "methods" (functions defined in the class) to allow 
# you to do useful things.

# George Gollin, University of Illinois, March 26, 2017

import numpy as np

class histo:

    # hbookObject contains tools for creating an hbook-style
    # histogram.
    
    # use this way:
    #   import unit08_histogramObject as hb
    #
    #   histo1 = hb.histo('a 1-D histogram', 10, 0., 100.)
    #   histo2 = hb.histo('a scatterplot', 10, 0., 100., 15, 0., 4.)
    #
    #   histo1.hsetlabels('my x axis', 'my y axis');
    #
    #   histo1.hfill(the_xvalue);
    #   histo2.hfill(an_xvalue, a_yvalue);
    #
    #   histo1.hprint();
    #   histo2.hprint();
    #
    # for a logarithmic y scale use hprintlog instead of hprint:
    #   histo1.hprintlog();
    #   histo2.hprintlog();
    
    # Note that we want to modify the object in place, rather
    # than working with a copy: this is the ol' call-by-value vs.
    # call-by-reference issue.
    
    def __init__(self, title = "histogram title", nxbins = 10, xleft = 0., \
    xright = 100., nybins = 0, ybottom = 0., ytop = 0.):
        
        # import library
        import numpy as np

        # histogram title: assign the class to have the same value as the 
        # argument when instantiating an object of class histo.
        self.htitle = title

        # number of x bins
        self.nx = nxbins

        # number of y bins, if a scatter plot
        self.ny = nybins
        
        # xmin and xmax: lower edge of first bin, upper edge of last bin
        self.xmin = xleft
        self.xmax = xright

        # ymin and ymax: lower edge of first bin, upper edge of last bin if
        # this is a scatter plot
        self.ymin = ybottom
        self.ymax = ytop
        
        # histogram type: 1 for 1-D, 2 for scatter plot. determine this by the
        # number of y bins.
        if self.ny == 0:
            self.htype = 1
        else:
            self.htype = 2
            self.dy = (self.ymax - self.ymin) / self.ny

        # set the x bin width too
        self.dx = (self.xmax - self.xmin) / self.nx

        # x and y axis labels
        self.xlabel_hist = "x axis label"
        self.ylabel_hist = "y axis label"

        # total number of entries in this histogram, excluding
        # over/underflows
        self.ntot = 0

        # total number of entries in this histogram, including
        # over/underflows
        self.ntot_including_overflows = 0

        # make bin population arrays. integrated_binpop is the cumulative value
        # (running integral) for bin contents; I will normalize it so that it 
        # runs from near zero to 1.000.
        if self.htype == 1:
            
            # nx elements
            self.binpop = np.array([0] * self.nx)
            self.integrated_binpop = self.binpop
            
            # left edges of each of the bins. Setting the last argument to 
            # False means we do not include the right edge of the last bin in 
            # our list of values.
            self.bin_left_edge = np.linspace(self.xmin, self.xmax, self.nx, False )
            
        else:

            # ny rows and nx columns
            self.binpop = np.array([np.array([0] * self.nx)] * self.ny)
            self.integrated_binpop = self.binpop
            
            # left edges and bottom edges of each of the bins.
            self.bin_left_edge = np.linspace(self.xmin, self.xmax, self.nx, False )
            self.bin_bottom_edge = np.linspace(self.ymin, self.ymax, self.ny, False )
            
        # stuff relating to the integrated array so we can throw random numbers
        # flat between zero and one, and decide what value of the integrated
        # histogram's x axis corresponds to that random number.
        self.number_of_probability_bins_uniform_flat = 100
        self.probability_bin_width_uniform_flat = \
        1. / self.number_of_probability_bins_uniform_flat
        
        # integrated probability from zero to the right side of each bin.
        # if, for example, we have a 100 bin array, the bin width will
        # be 0.01 and the first bin will correspond to the 1% point in
        # the histogram. The 50th bin will be the histogram median; the
        # 100th bin will be the right side of the last histogram bin
        # holding anything. I am going to assume that distribution of
        # events inside a single histogram bin is smooth: perhaps flat,
        # or else linear.
        self.integrated_probability_to_here_uniform_flat = \
        np.linspace(self.probability_bin_width_uniform_flat, 1.000, \
        self.number_of_probability_bins_uniform_flat)
        
        self.corresponding_integrated_binpop_array_index = self.integrated_binpop

        # mean and RMS width in x and y
        self.xmean = 0.
        self.xrms = 0.
        self.ymean = 0.
        self.yrms = 0.

        # quantities used to calculate mean and RMS width for points inside
        # the plot boundaries
        self.xsum = 0.
        self.xsumsq = 0.
        self.ysum = 0.
        self.ysumsq = 0.
        
        # flag that ios sete when we have already called hintegrate for this 
        # histogram
        self.already_called_hintegrate = False

        # graphics window handle, set by hplot, and zero before then.
        self.handle = 0
   
    ###########################################################################
    # end of class constructor __init__
    ###########################################################################

    # here is the function to fill the histogram: call it every time we have another 
    # piece of data to accumulate in our histogram.

    def hfill(self, xvalue, yvalue = np.nan):
    
        # use this way:
        #   histo1 = hbookObject('a 1-D histogram', 10, 0., 100.);
        #   histo2 = hbookObject('a scatterplot', 10, 0., 100., ...
        #                                             15, 0., 4.);
        #
        #   histo1.hfill(the_xvalue);
        #   histo2.hfill(an_xvalue, a_yvalue);
    
        # increment a counter
        self.ntot_including_overflows = self.ntot_including_overflows + 1
    
        # calculate the x bin. Note that the first bin is bin zero.
        # do this hbook style, with bins referenced by their lower
        # edge. the "floor" function does a truncation
        # converting (a positive real) to an integer, kinda like Fortran
        if xvalue >= self.xmin and xvalue <= self.xmax:
            
            ixbin = np.floor((xvalue - self.xmin) / self.dx)
    
            # protect against landing exactly on the right edge
            if ixbin > self.nx - 1:
                ixbin = self.nx - 1
    
            # also calculate stuff used in eventual mean/RMS
            # determination
            self.xsum = self.xsum + xvalue
            self.xsumsq = self.xsumsq + xvalue * xvalue
    
        else:
            
            ixbin = -1
    
        # now do the y bin.
        if self.htype == 2 and yvalue >= self.ymin and yvalue <= self.ymax \
        and yvalue != np.nan:
            iybin = np.floor((yvalue - self.ymin) / self.dy)
    
            # protect against landing exactly on the right edge
            if iybin > self.ny - 1:
                iybin = self.ny - 1
    
            # also calculate stuff used in eventual mean/RMS
            # determination
            self.ysum = self.ysum + yvalue
            self.ysumsq = self.ysumsq + yvalue * yvalue
    
        else:
            iybin = -1
    
        # now increment the bin population.
        ixbin_WTF = int(ixbin)
        iybin_WTF = int(iybin)
        
        if self.htype == 1 and ixbin_WTF >= 0:
            self.binpop[ixbin_WTF] = self.binpop[ixbin_WTF] + 1
            self.ntot = self.ntot + 1
    
        if self.htype == 2 and ixbin_WTF >= 0 and iybin_WTF >= 0:
            self.binpop[ixbin_WTF, iybin_WTF] = self.binpop[ixbin_WTF, iybin_WTF] + 1
            self.ntot = self.ntot + 1
    
        # keep a running value of means and rms widths, so calculate
        # them now.
    
        if self.ntot > 0:
    
            self.xmean = self.xsum / self.ntot
            mean_square = self.xsumsq / self.ntot
            self.xrms = np.sqrt(np.abs(mean_square - self.xmean * self.xmean))
    
            if self.htype == 2:
    
                self.ymean = self.ysum / self.ntot
                mean_square = self.ysumsq / self.ntot
                self.yrms = np.sqrt(np.abs(mean_square - self.ymean * self.ymean))
           
    ###########################################################################
    # end of class function hfill
    ###########################################################################
    
    # here is a function to calculate a running integral of the bin
    # population in the histogram. This function also sets up arrays
    # which allow me to generate random numbers distributed
    # (approximately) according to this histogram.
    
    # this routine can only be called for a 1-D histogram.
    
    def hintegrate(self):
    
        if self.htype != 1:
            print("do not call hintegrate for a 2-D histogram.")
            return

        # now calculate a running integration of the histogram
        self.already_called_hintegrate = True
        running_sum = 0
    
        # note that the range function makes a semi-closed interval: it will 
        # include 0, but not self.nx. And since arrays are indexed starting
        # at zero, this is just what we want: the indices should go from 0 to
        # array_length - 1.
        for ixbin in range(0, self.nx):
            running_sum = running_sum + self.binpop[ixbin]
            self.integrated_binpop[ixbin] = running_sum
    
        # now normalize the integrated bin contents so we can use this
        # as a probability distribution. with python we can do all the array
        # elements in one shotm, so comment-out the matlab version.
        # for ixbin in range(0, self.nx):
        #     self.integrated_binpop[ixbin] = self.integrated_binpop[ixbin] / running_sum
        self.integrated_binpop = self.integrated_binpop / running_sum
      
        # now set up arrays I can use to generate random numbers
        # thrown according to the shape of this histogram. We'll be
        # working with two different arrays. One is a uniform, flat
        # distribution of probabilities that range from 0 to 1. The
        # other is the integrated probability distribution for the
        # histogram, namely the fraction of the events in the histogram
        # that occur from the left-most edge of the full histogram up
        # to the right edge of each bin.
    
        # in general, these arrays will have different numbers of
        # bins: the uniform/flat array will typically have 100 bins,
        # while the other will have the same number of bins as the
        # histogram under consideration. we want to associate a bin
        # in the uniform/flat array with a point along the histogram's
        # horizontal axis corresponding to that uniform/flat bin's value.
        # what I mean is this: say bin 50 in the uniform/flat array
        # corresponds to an integrated probability of 0.500. In that
        # case, it should point to the spot in the histogram that is
        # the median in the distribution.
    
        # now loop over bins in the uniform/flat array. Here's what
        # we're doing. Imagine that the uniform/flat array is 100
        # elements long. The first array element, which correspoinds to
        # 1% of the histogram population, will hold the value of the
        # histogram's x axis that corresponds to 1% of the integrated
        # histogram population.
    
        # we want to see which integrated probability bin first exceeds
        # the value of integrated_probability_to_here.
    
        previous_histo_bin = 0
    
        for ibin_uniform_flat in range(0, self.number_of_probability_bins_uniform_flat):
    
            probability_right_edge_uniform_flat = \
            self.integrated_probability_to_here_uniform_flat[ibin_uniform_flat]
    
            # look for the first bin in my histogram that has the
            # integral, up to the right edge of that bin, exceeding the
            # value of probability_right_edge_uniform_flat.
    
            for histo_bin in range(previous_histo_bin, self.nx):
    
                probability_right_edge_histo = self.integrated_binpop[histo_bin]
    
                if probability_right_edge_histo >= probability_right_edge_uniform_flat:
    
                    # found it! see how far from the left edge of this
                    # bin we need to go to get the approximate point on
                    # the histogram x axis.
    
                    if histo_bin > 0:
                        probability_left_edge_histo = self.integrated_binpop[histo_bin - 1]
                    else:
                        probability_left_edge_histo = 0
    
                    fraction_of_histo_bin = \
                    (probability_right_edge_uniform_flat - probability_left_edge_histo) / \
                    (probability_right_edge_histo - probability_left_edge_histo)
    
                    # get histogram x axis value now
    
                    histo_x_value = self.xmin + self.dx * (histo_bin - 1 + fraction_of_histo_bin)
    
                    self.corresponding_integrated_binpop_array_index[ibin_uniform_flat] = \
                    histo_x_value

                    # update a few things then jump out of this inner loop.
                    previous_histo_bin = histo_bin
    
                    # now break out of this loop.
                    break
    
    ###########################################################################
    # end of class function hintegrate
    ###########################################################################
    
    # here is a function to return a random number according to a 
    # distribution given approximately by the histogram.
    
    def hrandom(self):
    
        import random as ran

        # this routine can only be called for a 1-D histogram. 
        if self.htype != 1:
            print("do not call hrandom for a 2-D histogram.")
            return np.nan
    
        # check that we've already called hintegrate by checking the 
        # length of one of the arrays it fills. if not yet called, call 
        # it here.
    
        if not self.already_called_hintegrate:
            self.hintegrate()
    
        # get a random number, chosen flat from [0,1), then turn this
        # into a bin.
        flat_random = ran.random() / self.probability_bin_width_uniform_flat
    
        # now see where in my flat randoms array this falls. the matlab
        # floor function truncates to an integer.
    
        flat_random_bin = np.floor(flat_random)
        flat_random_bin_fraction = flat_random - flat_random_bin
        
        # we need to see where inside this particular bin the random number
        # falls so we can extrapolate across the bin.
    
        if flat_random_bin >= self.number_of_probability_bins_uniform_flat:
    
            # here if we've overshot things, going past the right end of the array
            TheRandom = self.xmax
    
        elif flat_random_bin == 0:
    
            # here if we are in the very first bin
            TheRandom = self.xmin + (self.corresponding_integrated_binpop_array_index[0] - self.xmin) * \
            flat_random_bin_fraction
    
        else:
    
            # here to extrapolate where inside this bin to assign the value.
            TheRandom = self.corresponding_integrated_binpop_array_index[flat_random_bin] + \
            (self.corresponding_integrated_binpop_array_index[flat_random_bin + 1] - \
            self.corresponding_integrated_binpop_array_index[flat_random_bin]) * \
            flat_random_bin_fraction
    
        return TheRandom
    
    ###########################################################################
    # end of class function hrandom
    ###########################################################################

    # here is a function to set the histogram's axis labels.
    
    def hsetlabels(self, xstring = "x axis label", ystring = ""):
    
        self.xlabel_hist = xstring
        self.ylabel_hist = ystring
    
    ###########################################################################
    # end of class function hsetlabels
    ###########################################################################
    
    # here is a function to display the histogram to the screen in a
    # new window, and load the "handle" to that window into the object.
    # we will either display the histogram all by itself in the window (when
    # the arguments are 0 or 1 or unspecified) or else as a sublot in a window
    # with the specified number of rowas and columns of independent histograms.
    # histyograms are numbered from left to right, from top row to bottom row.
    
    def hprint(self, rows = 0, columns = 0, which_pane = 0):
        
        # import library
        import matplotlib.pyplot as plt
    
        # tell whether plot goes by itself or not depending on the
        # number of arguments to this function. If solo, or the first
        # plot to be put in this window, open a new graphics window.
    
        if rows == 0 or columns == 0 or which_pane == 0:
            number_args = 1
        else:
            number_args = 4

        if number_args == 1 or which_pane == 1:
    
            # open a new window and title it. Also get the handle for
            # the newly opened window.    
            self.handle = plt.figure(self.htitle)
    
        if number_args == 4:
            
            # set the subplot. 321 indicates three rows, two columns of plots
            # with histogram to be placed in the first row, first column
            subplot_argument = 100 * rows + 10 * columns + which_pane
            plt.subplot(subplot_argument)
    
        # now draw it.
        self.hdraw(rows, columns, which_pane, self.handle)
        
    ###########################################################################
    # end of class function hprint
    ###########################################################################
    
    # here is a function to display the histogram to the screen in a
    # new window, and load the "handle" to that window into the object.
    # routine works by calling hprint, then changing the y axis to a
    # log scale.
    
    def hprintlog(self,  rows = 0, columns = 0, which_pane = 0):
        
        self.hprint(rows, columns, which_pane)

        # get the axes        
        ax = self.handle.gca()

        # make a logarithmic y scale
        ax.set_yscale("log")

    ###########################################################################
    # end of class function hprintlog
    ###########################################################################
        
    # here is a function to display the histogram to the screen in the currently
    # open plot window. It is called by hprint.
    
    def hdraw(self,  rows, columns, which_pane, figure_handle):
        
        import matplotlib.pyplot as plt
    
        # tell whether plot goes by itself or not depending on the
        # number of arguments to this function. If solo, or the first
        # plot to be put in this window, put the text box with means,
        # etc. in the upper left corner of the plot.
    
        if rows == 0 or columns == 0 or which_pane == 0:
            number_dimensions = 1
        else:
            number_dimensions = 2
    
        # now get graphics pointer/handle information for a textbox that will go into
        # the currently open window. The "annotation" reference opens the (empty)
        # textbox in the current graphics window. The format for the coordinate
        # information is [xleft ybottom xwidth ywidth], referenced to the full
        # width and height of the graphics portion of the window. (Menus are not
        # included in the width/height calculation.)
    
        # 1-D histogram:
    
        if self.htype == 1 or number_dimensions == 1:
    
            # get axis properties
            ax = figure_handle.gca()
            
            
            # left edges of bins: self.bin_left_edge
            # bin populations: self.binpop
            # we need to monkey around a little to get the last bin to display.
            bin_edges = \
            np.append(self.bin_left_edge, self.bin_left_edge[self.nx - 1] + self.dx)

            bin_contents = np.append(self.binpop, self.binpop[self.nx - 1])
            
            plt.step(bin_edges, bin_contents, where = "post")

            # label stuff
            plt.xlabel(self.xlabel_hist)
            plt.ylabel(self.ylabel_hist)
            plt.title(self.htitle)
            
            # now create a string that includes 5 digits past the decimal point for the
            # mean and 3 past the decimal for the RMS width. Note the exact placement of
            # percent signs, and so forth. 
            statistics_message = \
            "mean: %.5f  \nRMS = %.3f  \nN = %d" % (self.xmean, self.xrms, self.ntot)

            # add the statistics information string to the upper left corner of the plot.
            plt.text(0.02, 0.98, statistics_message, \
            horizontalalignment="left", verticalalignment="top", \
            transform=ax.transAxes, color="#550000", fontsize=12)  
            
            
        # 2-D histograms
            
        # I haven't implemented this yet.
            
    ###########################################################################
    # end of class function hdraw
    ###########################################################################