# ---
# jupyter:
#   jupytext:
#     text_representation:
#       extension: .py
#       format_name: percent
#       format_version: '1.3'
#       jupytext_version: 1.14.1
#   kernelspec:
#     display_name: Python 3
#     language: python
#     name: python3
# ---

# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# # Index of ML Operations<a id='top_phases'></a>
# <ul>
# <ul><li><details><summary><h2>Imported Libraries</h2></summary>
# <ul>
#
# <li><b>datetime</b></li>
# <li><b>keras</b></li>
# <li><b>matplotlib</b></li>
# <li><b>numpy</b></li>
# <li><b>pandas</b></li>
# <li><b>plotly</b></li>
# <li><b>random</b></li>
# <li><b>scipy</b></li>
# <li><b>seaborn</b></li>
# <li><b>sklearn</b></li>
# <li><b>statsmodels</b></li>
# <li><b>warnings</b></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h2>Visualization</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Visualization" Calls</u></b></summary>
# <ul>
#
# <li> <b>matplotlib</b>
# <ul>
# <li>
# <details><summary><u>matplotlib.pyplot.figure</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Create a new figure, or activate an existing figure.
#
# Parameters
# ----------
# num : int or str or `.Figure`, optional
#     A unique identifier for the figure.
#
#     If a figure with that identifier already exists, this figure is made
#     active and returned. An integer refers to the ``Figure.number``
#     attribute, a string refers to the figure label.
#
#     If there is no figure with the identifier or *num* is not given, a new
#     figure is created, made active and returned.  If *num* is an int, it
#     will be used for the ``Figure.number`` attribute, otherwise, an
#     auto-generated integer value is used (starting at 1 and incremented
#     for each new figure). If *num* is a string, the figure label and the
#     window title is set to this value.
#
# figsize : (float, float), default: :rc:`figure.figsize`
#     Width, height in inches.
#
# dpi : float, default: :rc:`figure.dpi`
#     The resolution of the figure in dots-per-inch.
#
# facecolor : color, default: :rc:`figure.facecolor`
#     The background color.
#
# edgecolor : color, default: :rc:`figure.edgecolor`
#     The border color.
#
# frameon : bool, default: True
#     If False, suppress drawing the figure frame.
#
# FigureClass : subclass of `~matplotlib.figure.Figure`
#     Optionally use a custom `.Figure` instance.
#
# clear : bool, default: False
#     If True and the figure already exists, then it is cleared.
#
# tight_layout : bool or dict, default: :rc:`figure.autolayout`
#     If ``False`` use *subplotpars*. If ``True`` adjust subplot
#     parameters using `.tight_layout` with default padding.
#     When providing a dict containing the keys ``pad``, ``w_pad``,
#     ``h_pad``, and ``rect``, the default `.tight_layout` paddings
#     will be overridden.
#
# constrained_layout : bool, default: :rc:`figure.constrained_layout.use`
#     If ``True`` use constrained layout to adjust positioning of plot
#     elements.  Like ``tight_layout``, but designed to be more
#     flexible.  See
#     :doc:`/tutorials/intermediate/constrainedlayout_guide`
#     for examples.  (Note: does not work with `add_subplot` or
#     `~.pyplot.subplot2grid`.)
#
#
# **kwargs : optional
#     See `~.matplotlib.figure.Figure` for other possible arguments.
#
# Returns
# -------
# `~matplotlib.figure.Figure`
#     The `.Figure` instance returned will also be passed to
#     new_figure_manager in the backends, which allows to hook custom
#     `.Figure` classes into the pyplot interface. Additional kwargs will be
#     passed to the `.Figure` init function.
#
# Notes
# -----
# If you are creating many figures, make sure you explicitly call
# `.pyplot.close` on the figures you are not using, because this will
# enable pyplot to properly clean up the memory.
#
# `~matplotlib.rcParams` defines the default values, which can be modified
# in the matplotlibrc file.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.title</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Set a title for the Axes.
#
# Set one of the three available Axes titles. The available titles
# are positioned above the Axes in the center, flush with the left
# edge, and flush with the right edge.
#
# Parameters
# ----------
# label : str
#     Text to use for the title
#
# fontdict : dict
#     A dictionary controlling the appearance of the title text,
#     the default *fontdict* is::
#
#        {'fontsize': rcParams['axes.titlesize'],
#         'fontweight': rcParams['axes.titleweight'],
#         'color': rcParams['axes.titlecolor'],
#         'verticalalignment': 'baseline',
#         'horizontalalignment': loc}
#
# loc : {'center', 'left', 'right'}, default: :rc:`axes.titlelocation`
#     Which title to set.
#
# y : float, default: :rc:`axes.titley`
#     Vertical Axes location for the title (1.0 is the top).  If
#     None (the default) and :rc:`axes.titley` is also None, y is
#     determined automatically to avoid decorators on the Axes.
#
# pad : float, default: :rc:`axes.titlepad`
#     The offset of the title from the top of the Axes, in points.
#
# Returns
# -------
# `.Text`
#     The matplotlib text instance representing the title
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     Other keyword arguments are text properties, see `.Text` for a list
#     of valid text properties.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.ylabel</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Set the label for the y-axis.
#
# Parameters
# ----------
# ylabel : str
#     The label text.
#
# labelpad : float, default: :rc:`axes.labelpad`
#     Spacing in points from the Axes bounding box including ticks
#     and tick labels.  If None, the previous value is left as is.
#
# loc : {'bottom', 'center', 'top'}, default: :rc:`yaxis.labellocation`
#     The label position. This is a high-level alternative for passing
#     parameters *y* and *horizontalalignment*.
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     `.Text` properties control the appearance of the label.
#
# See Also
# --------
# text : Documents the properties supported by `.Text`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.xlabel</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Set the label for the x-axis.
#
# Parameters
# ----------
# xlabel : str
#     The label text.
#
# labelpad : float, default: :rc:`axes.labelpad`
#     Spacing in points from the Axes bounding box including ticks
#     and tick labels.  If None, the previous value is left as is.
#
# loc : {'left', 'center', 'right'}, default: :rc:`xaxis.labellocation`
#     The label position. This is a high-level alternative for passing
#     parameters *x* and *horizontalalignment*.
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     `.Text` properties control the appearance of the label.
#
# See Also
# --------
# text : Documents the properties supported by `.Text`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.show</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Display all open figures.
#
# Parameters
# ----------
# block : bool, optional
#     Whether to wait for all figures to be closed before returning.
#
#     If `True` block and run the GUI main loop until all figure windows
#     are closed.
#
#     If `False` ensure that all figure windows are displayed and return
#     immediately.  In this case, you are responsible for ensuring
#     that the event loop is running to have responsive figures.
#
#     Defaults to True in non-interactive mode and to False in interactive
#     mode (see `.pyplot.isinteractive`).
#
# See Also
# --------
# ion : Enable interactive mode, which shows / updates the figure after
#       every plotting command, so that calling ``show()`` is not necessary.
# ioff : Disable interactive mode.
# savefig : Save the figure to an image file instead of showing it on screen.
#
# Notes
# -----
# **Saving figures to file and showing a window at the same time**
#
# If you want an image file as well as a user interface window, use
# `.pyplot.savefig` before `.pyplot.show`. At the end of (a blocking)
# ``show()`` the figure is closed and thus unregistered from pyplot. Calling
# `.pyplot.savefig` afterwards would save a new and thus empty figure. This
# limitation of command order does not apply if the show is non-blocking or
# if you keep a reference to the figure and use `.Figure.savefig`.
#
# **Auto-show in jupyter notebooks**
#
# The jupyter backends (activated via ``%matplotlib inline``,
# ``%matplotlib notebook``, or ``%matplotlib widget``), call ``show()`` at
# the end of every cell by default. Thus, you usually don't have to call it
# explicitly there.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>seaborn</b>
# <ul>
# <li>
# <details><summary><u>seaborn.categorical.barplot</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Show point estimates and confidence intervals as rectangular bars.
#
# A bar plot represents an estimate of central tendency for a numeric
# variable with the height of each rectangle and provides some indication of
# the uncertainty around that estimate using error bars. Bar plots include 0
# in the quantitative axis range, and they are a good choice when 0 is a
# meaningful value for the quantitative variable, and you want to make
# comparisons against it.
#
# For datasets where 0 is not a meaningful value, a point plot will allow you
# to focus on differences between levels of one or more categorical
# variables.
#
# It is also important to keep in mind that a bar plot shows only the mean
# (or other estimator) value, but in many cases it may be more informative to
# show the distribution of values at each level of the categorical variables.
# In that case, other approaches such as a box or violin plot may be more
# appropriate.
#
#
# Input data can be passed in a variety of formats, including:
#
# - Vectors of data represented as lists, numpy arrays, or pandas Series
#   objects passed directly to the ``x``, ``y``, and/or ``hue`` parameters.
# - A "long-form" DataFrame, in which case the ``x``, ``y``, and ``hue``
#   variables will determine how the data are plotted.
# - A "wide-form" DataFrame, such that each numeric column will be plotted.
# - An array or list of vectors.
#
# In most cases, it is possible to use numpy or Python objects, but pandas
# objects are preferable because the associated names will be used to
# annotate the axes. Additionally, you can use Categorical types for the
# grouping variables to control the order of plot elements.    
#
# This function always treats one of the variables as categorical and
# draws data at ordinal positions (0, 1, ... n) on the relevant axis, even
# when the data has a numeric or date type.
#
# See the :ref:`tutorial <categorical_tutorial>` for more information.    
#
# Parameters
# ----------
# x, y, hue : names of variables in ``data`` or vector data, optional
#     Inputs for plotting long-form data. See examples for interpretation.        
# data : DataFrame, array, or list of arrays, optional
#     Dataset for plotting. If ``x`` and ``y`` are absent, this is
#     interpreted as wide-form. Otherwise it is expected to be long-form.    
# order, hue_order : lists of strings, optional
#     Order to plot the categorical levels in, otherwise the levels are
#     inferred from the data objects.        
# estimator : callable that maps vector -> scalar, optional
#     Statistical function to estimate within each categorical bin.
# ci : float or "sd" or None, optional
#     Size of confidence intervals to draw around estimated values.  If
#     "sd", skip bootstrapping and draw the standard deviation of the
#     observations. If ``None``, no bootstrapping will be performed, and
#     error bars will not be drawn.
# n_boot : int, optional
#     Number of bootstrap iterations to use when computing confidence
#     intervals.
# units : name of variable in ``data`` or vector data, optional
#     Identifier of sampling units, which will be used to perform a
#     multilevel bootstrap and account for repeated measures design.
# seed : int, numpy.random.Generator, or numpy.random.RandomState, optional
#     Seed or random number generator for reproducible bootstrapping.    
# orient : "v" | "h", optional
#     Orientation of the plot (vertical or horizontal). This is usually
#     inferred based on the type of the input variables, but it can be used
#     to resolve ambiguity when both `x` and `y` are numeric or when
#     plotting wide-form data.    
# color : matplotlib color, optional
#     Color for all of the elements, or seed for a gradient palette.    
# palette : palette name, list, or dict
#     Colors to use for the different levels of the ``hue`` variable. Should
#     be something that can be interpreted by :func:`color_palette`, or a
#     dictionary mapping hue levels to matplotlib colors.    
# saturation : float, optional
#     Proportion of the original saturation to draw colors at. Large patches
#     often look better with slightly desaturated colors, but set this to
#     ``1`` if you want the plot colors to perfectly match the input color
#     spec.    
# errcolor : matplotlib color
#     Color for the lines that represent the confidence interval.
# errwidth : float, optional
#     Thickness of error bar lines (and caps).         
# capsize : float, optional
#     Width of the "caps" on error bars.
#
# dodge : bool, optional
#     When hue nesting is used, whether elements should be shifted along the
#     categorical axis.    
# ax : matplotlib Axes, optional
#     Axes object to draw the plot onto, otherwise uses the current Axes.    
# kwargs : key, value mappings
#     Other keyword arguments are passed through to
#     :meth:`matplotlib.axes.Axes.bar`.
#
# Returns
# -------
# ax : matplotlib Axes
#     Returns the Axes object with the plot drawn onto it.    
#
# See Also
# --------
# countplot : Show the counts of observations in each categorical bin.    
# pointplot : Show point estimates and confidence intervals using scatterplot
#             glyphs.    
# catplot : Combine a categorical plot with a :class:`FacetGrid`.    
#
# Examples
# --------
#
# Draw a set of vertical bar plots grouped by a categorical variable:
#
# .. plot::
#     :context: close-figs
#
#     >>> import seaborn as sns
#     >>> sns.set_theme(style="whitegrid")
#     >>> tips = sns.load_dataset("tips")
#     >>> ax = sns.barplot(x="day", y="total_bill", data=tips)
#
# Draw a set of vertical bars with nested grouping by a two variables:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="total_bill", hue="sex", data=tips)
#
# Draw a set of horizontal bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="tip", y="day", data=tips)
#
# Control bar order by passing an explicit order:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="time", y="tip", data=tips,
#     ...                  order=["Dinner", "Lunch"])
#
# Use median as the estimate of central tendency:
#
# .. plot::
#     :context: close-figs
#
#     >>> from numpy import median
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, estimator=median)
#
# Show the standard error of the mean with the error bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, ci=68)
#
# Show standard deviation of observations instead of a confidence interval:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, ci="sd")
#
# Add "caps" to the error bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, capsize=.2)
#
# Use a different color palette for the bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="size", y="total_bill", data=tips,
#     ...                  palette="Blues_d")
#
# Use ``hue`` without changing bar position or width:
#
# .. plot::
#     :context: close-figs
#
#     >>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"])
#     >>> ax = sns.barplot(x="day", y="total_bill", hue="weekend",
#     ...                  data=tips, dodge=False)
#
# Plot all bars in a single color:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="size", y="total_bill", data=tips,
#     ...                  color="salmon", saturation=.5)
#
# Use :meth:`matplotlib.axes.Axes.bar` parameters to control the style.
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="total_bill", data=tips,
#     ...                  linewidth=2.5, facecolor=(1, 1, 1, 0),
#     ...                  errcolor=".2", edgecolor=".2")
#
# Use :func:`catplot` to combine a :func:`barplot` and a :class:`FacetGrid`.
# This allows grouping within additional categorical variables. Using
# :func:`catplot` is safer than using :class:`FacetGrid` directly, as it
# ensures synchronization of variable order across facets:
#
# .. plot::
#     :context: close-figs
#
#     >>> g = sns.catplot(x="sex", y="total_bill",
#     ...                 hue="smoker", col="time",
#     ...                 data=tips, kind="bar",
#     ...                 height=4, aspect=.7);
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>plotly</b>
# <ul>
# <li>
# <details><summary><u>plotly.graph_objs._layout.Layout</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'title': 'Montly Sales Diff'}</li></ul>
# <blockquote>
# <code>
# Base class for the layout type. The Layout class itself is a
# code-generated subclass.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.graph_objs._figure.Figure</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Base class for all figure types (both widget and non-widget)
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.offline.offline.iplot</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Draw plotly graphs inside an IPython or Jupyter notebook
#
# figure_or_data -- a plotly.graph_objs.Figure or plotly.graph_objs.Data or
#                   dict or list that describes a Plotly graph.
#                   See https://plot.ly/python/ for examples of
#                   graph descriptions.
#
# Keyword arguments:
# show_link (default=False) -- display a link in the bottom-right corner of
#                             of the chart that will export the chart to
#                             Plotly Cloud or Plotly Enterprise
# link_text (default='Export to plot.ly') -- the text of export link
# validate (default=True) -- validate that all of the keys in the figure
#                            are valid? omit if your version of plotly.js
#                            has become outdated with your version of
#                            graph_reference.json or if you need to include
#                            extra, unnecessary keys in your figure.
# image (default=None |'png' |'jpeg' |'svg' |'webp') -- This parameter sets
#     the format of the image to be downloaded, if we choose to download an
#     image. This parameter has a default value of None indicating that no
#     image should be downloaded. Please note: for higher resolution images
#     and more export options, consider using plotly.io.write_image. See
#     https://plot.ly/python/static-image-export/ for more details.
# filename (default='plot') -- Sets the name of the file your image
#     will be saved to. The extension should not be included.
# image_height (default=600) -- Specifies the height of the image in `px`.
# image_width (default=800) -- Specifies the width of the image in `px`.
# config (default=None) -- Plot view options dictionary. Keyword arguments
#     `show_link` and `link_text` set the associated options in this
#     dictionary if it doesn't contain them already.
# auto_play (default=True) -- Whether to automatically start the animation
#     sequence on page load, if the figure contains frames. Has no effect if
#     the figure does not contain frames.
# animation_opts (default=None) -- Dict of custom animation parameters that
#     are used for the automatically started animation on page load. This
#     dict is passed to the function Plotly.animate in Plotly.js. See
#     https://github.com/plotly/plotly.js/blob/master/src/plots/animation_attributes.js
#     for available options. Has no effect if the figure
#     does not contain frames, or auto_play is False.
#
# Example:
# ```
# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode()
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}])
# We can also download an image of the plot by setting the image to the
# format you want. e.g. `image='png'`
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}], image='png')
# ```
#
# animation_opts Example:
# ```
# from plotly.offline import iplot
# figure = {'data': [{'x': [0, 1], 'y': [0, 1]}],
#           'layout': {'xaxis': {'range': [0, 5], 'autorange': False},
#                      'yaxis': {'range': [0, 5], 'autorange': False},
#                      'title': 'Start Title'},
#           'frames': [{'data': [{'x': [1, 2], 'y': [1, 2]}]},
#                      {'data': [{'x': [1, 4], 'y': [1, 4]}]},
#                      {'data': [{'x': [3, 4], 'y': [3, 4]}],
#                       'layout': {'title': 'End Title'}}]}
# iplot(figure, animation_opts={'frame': {'duration': 1}})
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 5</u></h3></summary><small><a href=#5>goto cell # 5</a></small>
# <ul>
#
# <li> <b>matplotlib</b>
# <ul>
# <li>
# <details><summary><u>matplotlib.pyplot.figure</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Create a new figure, or activate an existing figure.
#
# Parameters
# ----------
# num : int or str or `.Figure`, optional
#     A unique identifier for the figure.
#
#     If a figure with that identifier already exists, this figure is made
#     active and returned. An integer refers to the ``Figure.number``
#     attribute, a string refers to the figure label.
#
#     If there is no figure with the identifier or *num* is not given, a new
#     figure is created, made active and returned.  If *num* is an int, it
#     will be used for the ``Figure.number`` attribute, otherwise, an
#     auto-generated integer value is used (starting at 1 and incremented
#     for each new figure). If *num* is a string, the figure label and the
#     window title is set to this value.
#
# figsize : (float, float), default: :rc:`figure.figsize`
#     Width, height in inches.
#
# dpi : float, default: :rc:`figure.dpi`
#     The resolution of the figure in dots-per-inch.
#
# facecolor : color, default: :rc:`figure.facecolor`
#     The background color.
#
# edgecolor : color, default: :rc:`figure.edgecolor`
#     The border color.
#
# frameon : bool, default: True
#     If False, suppress drawing the figure frame.
#
# FigureClass : subclass of `~matplotlib.figure.Figure`
#     Optionally use a custom `.Figure` instance.
#
# clear : bool, default: False
#     If True and the figure already exists, then it is cleared.
#
# tight_layout : bool or dict, default: :rc:`figure.autolayout`
#     If ``False`` use *subplotpars*. If ``True`` adjust subplot
#     parameters using `.tight_layout` with default padding.
#     When providing a dict containing the keys ``pad``, ``w_pad``,
#     ``h_pad``, and ``rect``, the default `.tight_layout` paddings
#     will be overridden.
#
# constrained_layout : bool, default: :rc:`figure.constrained_layout.use`
#     If ``True`` use constrained layout to adjust positioning of plot
#     elements.  Like ``tight_layout``, but designed to be more
#     flexible.  See
#     :doc:`/tutorials/intermediate/constrainedlayout_guide`
#     for examples.  (Note: does not work with `add_subplot` or
#     `~.pyplot.subplot2grid`.)
#
#
# **kwargs : optional
#     See `~.matplotlib.figure.Figure` for other possible arguments.
#
# Returns
# -------
# `~matplotlib.figure.Figure`
#     The `.Figure` instance returned will also be passed to
#     new_figure_manager in the backends, which allows to hook custom
#     `.Figure` classes into the pyplot interface. Additional kwargs will be
#     passed to the `.Figure` init function.
#
# Notes
# -----
# If you are creating many figures, make sure you explicitly call
# `.pyplot.close` on the figures you are not using, because this will
# enable pyplot to properly clean up the memory.
#
# `~matplotlib.rcParams` defines the default values, which can be modified
# in the matplotlibrc file.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.title</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['Items per Category'] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Set a title for the Axes.
#
# Set one of the three available Axes titles. The available titles
# are positioned above the Axes in the center, flush with the left
# edge, and flush with the right edge.
#
# Parameters
# ----------
# label : str
#     Text to use for the title
#
# fontdict : dict
#     A dictionary controlling the appearance of the title text,
#     the default *fontdict* is::
#
#        {'fontsize': rcParams['axes.titlesize'],
#         'fontweight': rcParams['axes.titleweight'],
#         'color': rcParams['axes.titlecolor'],
#         'verticalalignment': 'baseline',
#         'horizontalalignment': loc}
#
# loc : {'center', 'left', 'right'}, default: :rc:`axes.titlelocation`
#     Which title to set.
#
# y : float, default: :rc:`axes.titley`
#     Vertical Axes location for the title (1.0 is the top).  If
#     None (the default) and :rc:`axes.titley` is also None, y is
#     determined automatically to avoid decorators on the Axes.
#
# pad : float, default: :rc:`axes.titlepad`
#     The offset of the title from the top of the Axes, in points.
#
# Returns
# -------
# `.Text`
#     The matplotlib text instance representing the title
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     Other keyword arguments are text properties, see `.Text` for a list
#     of valid text properties.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.ylabel</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['# of items'] | <b>Kwargs:</b> {'fontsize': 12}</li></ul>
# <blockquote>
# <code>
# Set the label for the y-axis.
#
# Parameters
# ----------
# ylabel : str
#     The label text.
#
# labelpad : float, default: :rc:`axes.labelpad`
#     Spacing in points from the Axes bounding box including ticks
#     and tick labels.  If None, the previous value is left as is.
#
# loc : {'bottom', 'center', 'top'}, default: :rc:`yaxis.labellocation`
#     The label position. This is a high-level alternative for passing
#     parameters *y* and *horizontalalignment*.
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     `.Text` properties control the appearance of the label.
#
# See Also
# --------
# text : Documents the properties supported by `.Text`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.xlabel</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['Category'] | <b>Kwargs:</b> {'fontsize': 12}</li></ul>
# <blockquote>
# <code>
# Set the label for the x-axis.
#
# Parameters
# ----------
# xlabel : str
#     The label text.
#
# labelpad : float, default: :rc:`axes.labelpad`
#     Spacing in points from the Axes bounding box including ticks
#     and tick labels.  If None, the previous value is left as is.
#
# loc : {'left', 'center', 'right'}, default: :rc:`xaxis.labellocation`
#     The label position. This is a high-level alternative for passing
#     parameters *x* and *horizontalalignment*.
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     `.Text` properties control the appearance of the label.
#
# See Also
# --------
# text : Documents the properties supported by `.Text`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.show</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Display all open figures.
#
# Parameters
# ----------
# block : bool, optional
#     Whether to wait for all figures to be closed before returning.
#
#     If `True` block and run the GUI main loop until all figure windows
#     are closed.
#
#     If `False` ensure that all figure windows are displayed and return
#     immediately.  In this case, you are responsible for ensuring
#     that the event loop is running to have responsive figures.
#
#     Defaults to True in non-interactive mode and to False in interactive
#     mode (see `.pyplot.isinteractive`).
#
# See Also
# --------
# ion : Enable interactive mode, which shows / updates the figure after
#       every plotting command, so that calling ``show()`` is not necessary.
# ioff : Disable interactive mode.
# savefig : Save the figure to an image file instead of showing it on screen.
#
# Notes
# -----
# **Saving figures to file and showing a window at the same time**
#
# If you want an image file as well as a user interface window, use
# `.pyplot.savefig` before `.pyplot.show`. At the end of (a blocking)
# ``show()`` the figure is closed and thus unregistered from pyplot. Calling
# `.pyplot.savefig` afterwards would save a new and thus empty figure. This
# limitation of command order does not apply if the show is non-blocking or
# if you keep a reference to the figure and use `.Figure.savefig`.
#
# **Auto-show in jupyter notebooks**
#
# The jupyter backends (activated via ``%matplotlib inline``,
# ``%matplotlib notebook``, or ``%matplotlib widget``), call ``show()`` at
# the end of every cell by default. Thus, you usually don't have to call it
# explicitly there.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>seaborn</b>
# <ul>
# <li>
# <details><summary><u>seaborn.categorical.barplot</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'alpha': 0.8}</li></ul>
# <blockquote>
# <code>
# Show point estimates and confidence intervals as rectangular bars.
#
# A bar plot represents an estimate of central tendency for a numeric
# variable with the height of each rectangle and provides some indication of
# the uncertainty around that estimate using error bars. Bar plots include 0
# in the quantitative axis range, and they are a good choice when 0 is a
# meaningful value for the quantitative variable, and you want to make
# comparisons against it.
#
# For datasets where 0 is not a meaningful value, a point plot will allow you
# to focus on differences between levels of one or more categorical
# variables.
#
# It is also important to keep in mind that a bar plot shows only the mean
# (or other estimator) value, but in many cases it may be more informative to
# show the distribution of values at each level of the categorical variables.
# In that case, other approaches such as a box or violin plot may be more
# appropriate.
#
#
# Input data can be passed in a variety of formats, including:
#
# - Vectors of data represented as lists, numpy arrays, or pandas Series
#   objects passed directly to the ``x``, ``y``, and/or ``hue`` parameters.
# - A "long-form" DataFrame, in which case the ``x``, ``y``, and ``hue``
#   variables will determine how the data are plotted.
# - A "wide-form" DataFrame, such that each numeric column will be plotted.
# - An array or list of vectors.
#
# In most cases, it is possible to use numpy or Python objects, but pandas
# objects are preferable because the associated names will be used to
# annotate the axes. Additionally, you can use Categorical types for the
# grouping variables to control the order of plot elements.    
#
# This function always treats one of the variables as categorical and
# draws data at ordinal positions (0, 1, ... n) on the relevant axis, even
# when the data has a numeric or date type.
#
# See the :ref:`tutorial <categorical_tutorial>` for more information.    
#
# Parameters
# ----------
# x, y, hue : names of variables in ``data`` or vector data, optional
#     Inputs for plotting long-form data. See examples for interpretation.        
# data : DataFrame, array, or list of arrays, optional
#     Dataset for plotting. If ``x`` and ``y`` are absent, this is
#     interpreted as wide-form. Otherwise it is expected to be long-form.    
# order, hue_order : lists of strings, optional
#     Order to plot the categorical levels in, otherwise the levels are
#     inferred from the data objects.        
# estimator : callable that maps vector -> scalar, optional
#     Statistical function to estimate within each categorical bin.
# ci : float or "sd" or None, optional
#     Size of confidence intervals to draw around estimated values.  If
#     "sd", skip bootstrapping and draw the standard deviation of the
#     observations. If ``None``, no bootstrapping will be performed, and
#     error bars will not be drawn.
# n_boot : int, optional
#     Number of bootstrap iterations to use when computing confidence
#     intervals.
# units : name of variable in ``data`` or vector data, optional
#     Identifier of sampling units, which will be used to perform a
#     multilevel bootstrap and account for repeated measures design.
# seed : int, numpy.random.Generator, or numpy.random.RandomState, optional
#     Seed or random number generator for reproducible bootstrapping.    
# orient : "v" | "h", optional
#     Orientation of the plot (vertical or horizontal). This is usually
#     inferred based on the type of the input variables, but it can be used
#     to resolve ambiguity when both `x` and `y` are numeric or when
#     plotting wide-form data.    
# color : matplotlib color, optional
#     Color for all of the elements, or seed for a gradient palette.    
# palette : palette name, list, or dict
#     Colors to use for the different levels of the ``hue`` variable. Should
#     be something that can be interpreted by :func:`color_palette`, or a
#     dictionary mapping hue levels to matplotlib colors.    
# saturation : float, optional
#     Proportion of the original saturation to draw colors at. Large patches
#     often look better with slightly desaturated colors, but set this to
#     ``1`` if you want the plot colors to perfectly match the input color
#     spec.    
# errcolor : matplotlib color
#     Color for the lines that represent the confidence interval.
# errwidth : float, optional
#     Thickness of error bar lines (and caps).         
# capsize : float, optional
#     Width of the "caps" on error bars.
#
# dodge : bool, optional
#     When hue nesting is used, whether elements should be shifted along the
#     categorical axis.    
# ax : matplotlib Axes, optional
#     Axes object to draw the plot onto, otherwise uses the current Axes.    
# kwargs : key, value mappings
#     Other keyword arguments are passed through to
#     :meth:`matplotlib.axes.Axes.bar`.
#
# Returns
# -------
# ax : matplotlib Axes
#     Returns the Axes object with the plot drawn onto it.    
#
# See Also
# --------
# countplot : Show the counts of observations in each categorical bin.    
# pointplot : Show point estimates and confidence intervals using scatterplot
#             glyphs.    
# catplot : Combine a categorical plot with a :class:`FacetGrid`.    
#
# Examples
# --------
#
# Draw a set of vertical bar plots grouped by a categorical variable:
#
# .. plot::
#     :context: close-figs
#
#     >>> import seaborn as sns
#     >>> sns.set_theme(style="whitegrid")
#     >>> tips = sns.load_dataset("tips")
#     >>> ax = sns.barplot(x="day", y="total_bill", data=tips)
#
# Draw a set of vertical bars with nested grouping by a two variables:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="total_bill", hue="sex", data=tips)
#
# Draw a set of horizontal bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="tip", y="day", data=tips)
#
# Control bar order by passing an explicit order:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="time", y="tip", data=tips,
#     ...                  order=["Dinner", "Lunch"])
#
# Use median as the estimate of central tendency:
#
# .. plot::
#     :context: close-figs
#
#     >>> from numpy import median
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, estimator=median)
#
# Show the standard error of the mean with the error bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, ci=68)
#
# Show standard deviation of observations instead of a confidence interval:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, ci="sd")
#
# Add "caps" to the error bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, capsize=.2)
#
# Use a different color palette for the bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="size", y="total_bill", data=tips,
#     ...                  palette="Blues_d")
#
# Use ``hue`` without changing bar position or width:
#
# .. plot::
#     :context: close-figs
#
#     >>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"])
#     >>> ax = sns.barplot(x="day", y="total_bill", hue="weekend",
#     ...                  data=tips, dodge=False)
#
# Plot all bars in a single color:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="size", y="total_bill", data=tips,
#     ...                  color="salmon", saturation=.5)
#
# Use :meth:`matplotlib.axes.Axes.bar` parameters to control the style.
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="total_bill", data=tips,
#     ...                  linewidth=2.5, facecolor=(1, 1, 1, 0),
#     ...                  errcolor=".2", edgecolor=".2")
#
# Use :func:`catplot` to combine a :func:`barplot` and a :class:`FacetGrid`.
# This allows grouping within additional categorical variables. Using
# :func:`catplot` is safer than using :class:`FacetGrid` directly, as it
# ensures synchronization of variable order across facets:
#
# .. plot::
#     :context: close-figs
#
#     >>> g = sns.catplot(x="sex", y="total_bill",
#     ...                 hue="smoker", col="time",
#     ...                 data=tips, kind="bar",
#     ...                 height=4, aspect=.7);
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 8</u></h3></summary><small><a href=#8>goto cell # 8</a></small>
# <ul>
#
# <li> <b>plotly</b>
# <ul>
# <li>
# <details><summary><u>plotly.graph_objs._layout.Layout</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'title': ' Sales'}</li></ul>
# <blockquote>
# <code>
# Base class for the layout type. The Layout class itself is a
# code-generated subclass.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.graph_objs._figure.Figure</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Base class for all figure types (both widget and non-widget)
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.offline.offline.iplot</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Draw plotly graphs inside an IPython or Jupyter notebook
#
# figure_or_data -- a plotly.graph_objs.Figure or plotly.graph_objs.Data or
#                   dict or list that describes a Plotly graph.
#                   See https://plot.ly/python/ for examples of
#                   graph descriptions.
#
# Keyword arguments:
# show_link (default=False) -- display a link in the bottom-right corner of
#                             of the chart that will export the chart to
#                             Plotly Cloud or Plotly Enterprise
# link_text (default='Export to plot.ly') -- the text of export link
# validate (default=True) -- validate that all of the keys in the figure
#                            are valid? omit if your version of plotly.js
#                            has become outdated with your version of
#                            graph_reference.json or if you need to include
#                            extra, unnecessary keys in your figure.
# image (default=None |'png' |'jpeg' |'svg' |'webp') -- This parameter sets
#     the format of the image to be downloaded, if we choose to download an
#     image. This parameter has a default value of None indicating that no
#     image should be downloaded. Please note: for higher resolution images
#     and more export options, consider using plotly.io.write_image. See
#     https://plot.ly/python/static-image-export/ for more details.
# filename (default='plot') -- Sets the name of the file your image
#     will be saved to. The extension should not be included.
# image_height (default=600) -- Specifies the height of the image in `px`.
# image_width (default=800) -- Specifies the width of the image in `px`.
# config (default=None) -- Plot view options dictionary. Keyword arguments
#     `show_link` and `link_text` set the associated options in this
#     dictionary if it doesn't contain them already.
# auto_play (default=True) -- Whether to automatically start the animation
#     sequence on page load, if the figure contains frames. Has no effect if
#     the figure does not contain frames.
# animation_opts (default=None) -- Dict of custom animation parameters that
#     are used for the automatically started animation on page load. This
#     dict is passed to the function Plotly.animate in Plotly.js. See
#     https://github.com/plotly/plotly.js/blob/master/src/plots/animation_attributes.js
#     for available options. Has no effect if the figure
#     does not contain frames, or auto_play is False.
#
# Example:
# ```
# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode()
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}])
# We can also download an image of the plot by setting the image to the
# format you want. e.g. `image='png'`
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}], image='png')
# ```
#
# animation_opts Example:
# ```
# from plotly.offline import iplot
# figure = {'data': [{'x': [0, 1], 'y': [0, 1]}],
#           'layout': {'xaxis': {'range': [0, 5], 'autorange': False},
#                      'yaxis': {'range': [0, 5], 'autorange': False},
#                      'title': 'Start Title'},
#           'frames': [{'data': [{'x': [1, 2], 'y': [1, 2]}]},
#                      {'data': [{'x': [1, 4], 'y': [1, 4]}]},
#                      {'data': [{'x': [3, 4], 'y': [3, 4]}],
#                       'layout': {'title': 'End Title'}}]}
# iplot(figure, animation_opts={'frame': {'duration': 1}})
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 10</u></h3></summary><small><a href=#10>goto cell # 10</a></small>
# <ul>
#
# <li> <b>plotly</b>
# <ul>
# <li>
# <details><summary><u>plotly.graph_objs._layout.Layout</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'title': 'Montly Sales Diff'}</li></ul>
# <blockquote>
# <code>
# Base class for the layout type. The Layout class itself is a
# code-generated subclass.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.graph_objs._figure.Figure</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Base class for all figure types (both widget and non-widget)
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.offline.offline.iplot</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Draw plotly graphs inside an IPython or Jupyter notebook
#
# figure_or_data -- a plotly.graph_objs.Figure or plotly.graph_objs.Data or
#                   dict or list that describes a Plotly graph.
#                   See https://plot.ly/python/ for examples of
#                   graph descriptions.
#
# Keyword arguments:
# show_link (default=False) -- display a link in the bottom-right corner of
#                             of the chart that will export the chart to
#                             Plotly Cloud or Plotly Enterprise
# link_text (default='Export to plot.ly') -- the text of export link
# validate (default=True) -- validate that all of the keys in the figure
#                            are valid? omit if your version of plotly.js
#                            has become outdated with your version of
#                            graph_reference.json or if you need to include
#                            extra, unnecessary keys in your figure.
# image (default=None |'png' |'jpeg' |'svg' |'webp') -- This parameter sets
#     the format of the image to be downloaded, if we choose to download an
#     image. This parameter has a default value of None indicating that no
#     image should be downloaded. Please note: for higher resolution images
#     and more export options, consider using plotly.io.write_image. See
#     https://plot.ly/python/static-image-export/ for more details.
# filename (default='plot') -- Sets the name of the file your image
#     will be saved to. The extension should not be included.
# image_height (default=600) -- Specifies the height of the image in `px`.
# image_width (default=800) -- Specifies the width of the image in `px`.
# config (default=None) -- Plot view options dictionary. Keyword arguments
#     `show_link` and `link_text` set the associated options in this
#     dictionary if it doesn't contain them already.
# auto_play (default=True) -- Whether to automatically start the animation
#     sequence on page load, if the figure contains frames. Has no effect if
#     the figure does not contain frames.
# animation_opts (default=None) -- Dict of custom animation parameters that
#     are used for the automatically started animation on page load. This
#     dict is passed to the function Plotly.animate in Plotly.js. See
#     https://github.com/plotly/plotly.js/blob/master/src/plots/animation_attributes.js
#     for available options. Has no effect if the figure
#     does not contain frames, or auto_play is False.
#
# Example:
# ```
# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode()
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}])
# We can also download an image of the plot by setting the image to the
# format you want. e.g. `image='png'`
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}], image='png')
# ```
#
# animation_opts Example:
# ```
# from plotly.offline import iplot
# figure = {'data': [{'x': [0, 1], 'y': [0, 1]}],
#           'layout': {'xaxis': {'range': [0, 5], 'autorange': False},
#                      'yaxis': {'range': [0, 5], 'autorange': False},
#                      'title': 'Start Title'},
#           'frames': [{'data': [{'x': [1, 2], 'y': [1, 2]}]},
#                      {'data': [{'x': [1, 4], 'y': [1, 4]}]},
#                      {'data': [{'x': [3, 4], 'y': [3, 4]}],
#                       'layout': {'title': 'End Title'}}]}
# iplot(figure, animation_opts={'frame': {'duration': 1}})
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Data Preparation</span></h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Data Preparation" Calls</u></b></summary>
# <ul>
#
# <li> <b>sklearn</b>
# <ul>
# <li>
# <details><summary><u>sklearn.preprocessing._data.MinMaxScaler</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Transform features by scaling each feature to a given range.
#
# This estimator scales and translates each feature individually such
# that it is in the given range on the training set, e.g. between
# zero and one.
#
# The transformation is given by::
#
#     X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
#     X_scaled = X_std * (max - min) + min
#
# where min, max = feature_range.
#
# This transformation is often used as an alternative to zero mean,
# unit variance scaling.
#
# Read more in the :ref:`User Guide <preprocessing_scaler>`.
#
# Parameters
# ----------
# feature_range : tuple (min, max), default=(0, 1)
#     Desired range of transformed data.
#
# copy : bool, default=True
#     Set to False to perform inplace row normalization and avoid a
#     copy (if the input is already a numpy array).
#
# clip : bool, default=False
#     Set to True to clip transformed values of held-out data to
#     provided `feature range`.
#
#     .. versionadded:: 0.24
#
# Attributes
# ----------
# min_ : ndarray of shape (n_features,)
#     Per feature adjustment for minimum. Equivalent to
#     ``min - X.min(axis=0) * self.scale_``
#
# scale_ : ndarray of shape (n_features,)
#     Per feature relative scaling of the data. Equivalent to
#     ``(max - min) / (X.max(axis=0) - X.min(axis=0))``
#
#     .. versionadded:: 0.17
#        *scale_* attribute.
#
# data_min_ : ndarray of shape (n_features,)
#     Per feature minimum seen in the data
#
#     .. versionadded:: 0.17
#        *data_min_*
#
# data_max_ : ndarray of shape (n_features,)
#     Per feature maximum seen in the data
#
#     .. versionadded:: 0.17
#        *data_max_*
#
# data_range_ : ndarray of shape (n_features,)
#     Per feature range ``(data_max_ - data_min_)`` seen in the data
#
#     .. versionadded:: 0.17
#        *data_range_*
#
# n_features_in_ : int
#     Number of features seen during :term:`fit`.
#
#     .. versionadded:: 0.24
#
# n_samples_seen_ : int
#     The number of samples processed by the estimator.
#     It will be reset on new calls to fit, but increments across
#     ``partial_fit`` calls.
#
# feature_names_in_ : ndarray of shape (`n_features_in_`,)
#     Names of features seen during :term:`fit`. Defined only when `X`
#     has feature names that are all strings.
#
#     .. versionadded:: 1.0
#
# See Also
# --------
# minmax_scale : Equivalent function without the estimator API.
#
# Notes
# -----
# NaNs are treated as missing values: disregarded in fit, and maintained in
# transform.
#
# For a comparison of the different scalers, transformers, and normalizers,
# see :ref:`examples/preprocessing/plot_all_scaling.py
# <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
#
# Examples
# --------
# >>> from sklearn.preprocessing import MinMaxScaler
# >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
# >>> scaler = MinMaxScaler()
# >>> print(scaler.fit(data))
# MinMaxScaler()
# >>> print(scaler.data_max_)
# [ 1. 18.]
# >>> print(scaler.transform(data))
# [[0.   0.  ]
#  [0.25 0.25]
#  [0.5  0.5 ]
#  [1.   1.  ]]
# >>> print(scaler.transform([[2, 2]]))
# [[1.5 0. ]]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>sklearn.preprocessing._data.MinMaxScaler.fit</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Compute the minimum and maximum to be used for later scaling.
#
# Parameters
# ----------
# X : array-like of shape (n_samples, n_features)
#     The data used to compute the per-feature minimum and maximum
#     used for later scaling along the features axis.
#
# y : None
#     Ignored.
#
# Returns
# -------
# self : object
#     Fitted scaler.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy.ndarray.reshape</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [1] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# a.reshape(shape, order='C')
#
# Returns an array containing the same data with a new shape.
#
# Refer to `numpy.reshape` for full documentation.
#
# See Also
# --------
# numpy.reshape : equivalent function
#
# Notes
# -----
# Unlike the free function `numpy.reshape`, this method on `ndarray` allows
# the elements of the shape parameter to be passed in as separate arguments.
# For example, ``a.reshape(10, 11)`` is equivalent to
# ``a.reshape((10, 11))``.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.array</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 17</u></h3></summary><small><a href=#17>goto cell # 17</a></small>
# <ul>
#
# <li> <b>sklearn</b>
# <ul>
# <li>
# <details><summary><u>sklearn.preprocessing._data.MinMaxScaler</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Transform features by scaling each feature to a given range.
#
# This estimator scales and translates each feature individually such
# that it is in the given range on the training set, e.g. between
# zero and one.
#
# The transformation is given by::
#
#     X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
#     X_scaled = X_std * (max - min) + min
#
# where min, max = feature_range.
#
# This transformation is often used as an alternative to zero mean,
# unit variance scaling.
#
# Read more in the :ref:`User Guide <preprocessing_scaler>`.
#
# Parameters
# ----------
# feature_range : tuple (min, max), default=(0, 1)
#     Desired range of transformed data.
#
# copy : bool, default=True
#     Set to False to perform inplace row normalization and avoid a
#     copy (if the input is already a numpy array).
#
# clip : bool, default=False
#     Set to True to clip transformed values of held-out data to
#     provided `feature range`.
#
#     .. versionadded:: 0.24
#
# Attributes
# ----------
# min_ : ndarray of shape (n_features,)
#     Per feature adjustment for minimum. Equivalent to
#     ``min - X.min(axis=0) * self.scale_``
#
# scale_ : ndarray of shape (n_features,)
#     Per feature relative scaling of the data. Equivalent to
#     ``(max - min) / (X.max(axis=0) - X.min(axis=0))``
#
#     .. versionadded:: 0.17
#        *scale_* attribute.
#
# data_min_ : ndarray of shape (n_features,)
#     Per feature minimum seen in the data
#
#     .. versionadded:: 0.17
#        *data_min_*
#
# data_max_ : ndarray of shape (n_features,)
#     Per feature maximum seen in the data
#
#     .. versionadded:: 0.17
#        *data_max_*
#
# data_range_ : ndarray of shape (n_features,)
#     Per feature range ``(data_max_ - data_min_)`` seen in the data
#
#     .. versionadded:: 0.17
#        *data_range_*
#
# n_features_in_ : int
#     Number of features seen during :term:`fit`.
#
#     .. versionadded:: 0.24
#
# n_samples_seen_ : int
#     The number of samples processed by the estimator.
#     It will be reset on new calls to fit, but increments across
#     ``partial_fit`` calls.
#
# feature_names_in_ : ndarray of shape (`n_features_in_`,)
#     Names of features seen during :term:`fit`. Defined only when `X`
#     has feature names that are all strings.
#
#     .. versionadded:: 1.0
#
# See Also
# --------
# minmax_scale : Equivalent function without the estimator API.
#
# Notes
# -----
# NaNs are treated as missing values: disregarded in fit, and maintained in
# transform.
#
# For a comparison of the different scalers, transformers, and normalizers,
# see :ref:`examples/preprocessing/plot_all_scaling.py
# <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
#
# Examples
# --------
# >>> from sklearn.preprocessing import MinMaxScaler
# >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
# >>> scaler = MinMaxScaler()
# >>> print(scaler.fit(data))
# MinMaxScaler()
# >>> print(scaler.data_max_)
# [ 1. 18.]
# >>> print(scaler.transform(data))
# [[0.   0.  ]
#  [0.25 0.25]
#  [0.5  0.5 ]
#  [1.   1.  ]]
# >>> print(scaler.transform([[2, 2]]))
# [[1.5 0. ]]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>sklearn.preprocessing._data.MinMaxScaler.fit</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Compute the minimum and maximum to be used for later scaling.
#
# Parameters
# ----------
# X : array-like of shape (n_samples, n_features)
#     The data used to compute the per-feature minimum and maximum
#     used for later scaling along the features axis.
#
# y : None
#     Ignored.
#
# Returns
# -------
# self : object
#     Fitted scaler.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 21</u></h3></summary><small><a href=#21>goto cell # 21</a></small>
# <ul>
#
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy.ndarray.reshape</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [1] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# a.reshape(shape, order='C')
#
# Returns an array containing the same data with a new shape.
#
# Refer to `numpy.reshape` for full documentation.
#
# See Also
# --------
# numpy.reshape : equivalent function
#
# Notes
# -----
# Unlike the free function `numpy.reshape`, this method on `ndarray` allows
# the elements of the shape parameter to be passed in as separate arguments.
# For example, ``a.reshape(10, 11)`` is equivalent to
# ``a.reshape((10, 11))``.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.array</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li>
# <ul><li><details><summary><h2>Data Profiling and Exploratory Data Analysis</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Data Profiling and Exploratory Data Analysis" Calls</u></b></summary>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.head</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Return the first `n` rows.
#
# This function returns the first `n` rows for the object based
# on position. It is useful for quickly testing if your object
# has the right type of data in it.
#
# For negative values of `n`, this function returns all rows except
# the last `n` rows, equivalent to ``df[:-n]``.
#
# Parameters
# ----------
# n : int, default 5
#     Number of rows to select.
#
# Returns
# -------
# same type as caller
#     The first `n` rows of the caller object.
#
# See Also
# --------
# DataFrame.tail: Returns the last `n` rows.
#
# Examples
# --------
# >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
# ...                    'monkey', 'parrot', 'shark', 'whale', 'zebra']})
# >>> df
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
# 5     parrot
# 6      shark
# 7      whale
# 8      zebra
#
# Viewing the first 5 lines
#
# >>> df.head()
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
#
# Viewing the first `n` lines (three in this case)
#
# >>> df.head(3)
#       animal
# 0  alligator
# 1        bee
# 2     falcon
#
# For negative values of `n`
#
# >>> df.head(-3)
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
# 5     parrot
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 6</u></h3></summary><small><a href=#6>goto cell # 6</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.head</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Return the first `n` rows.
#
# This function returns the first `n` rows for the object based
# on position. It is useful for quickly testing if your object
# has the right type of data in it.
#
# For negative values of `n`, this function returns all rows except
# the last `n` rows, equivalent to ``df[:-n]``.
#
# Parameters
# ----------
# n : int, default 5
#     Number of rows to select.
#
# Returns
# -------
# same type as caller
#     The first `n` rows of the caller object.
#
# See Also
# --------
# DataFrame.tail: Returns the last `n` rows.
#
# Examples
# --------
# >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
# ...                    'monkey', 'parrot', 'shark', 'whale', 'zebra']})
# >>> df
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
# 5     parrot
# 6      shark
# 7      whale
# 8      zebra
#
# Viewing the first 5 lines
#
# >>> df.head()
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
#
# Viewing the first `n` lines (three in this case)
#
# >>> df.head(3)
#       animal
# 0  alligator
# 1        bee
# 2     falcon
#
# For negative values of `n`
#
# >>> df.head(-3)
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
# 5     parrot
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h2>Data Cleaning Filtering</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Data Cleaning Filtering" Calls</u></b></summary>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['date'] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 4</u></h3></summary><small><a href=#4>goto cell # 4</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [['date_block_num', 'shop_id', 'item_id']] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 5</u></h3></summary><small><a href=#5>goto cell # 5</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [['item_category_id']] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 7</u></h3></summary><small><a href=#7>goto cell # 7</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['date'] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Data Sub-sampling and Train-test Splitting</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Feature Engineering</span></h2></summary>
# <ul>
#
# None
#
# </ul>
# </details></li>
# <ul><li><details><summary><h2>Feature Transformation</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Feature Transformation" Calls</u></b></summary>
# <ul>
#
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy.ndarray.reshape</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [1] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# a.reshape(shape, order='C')
#
# Returns an array containing the same data with a new shape.
#
# Refer to `numpy.reshape` for full documentation.
#
# See Also
# --------
# numpy.reshape : equivalent function
#
# Notes
# -----
# Unlike the free function `numpy.reshape`, this method on `ndarray` allows
# the elements of the shape parameter to be passed in as separate arguments.
# For example, ``a.reshape(10, 11)`` is equivalent to
# ``a.reshape((10, 11))``.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 21</u></h3></summary><small><a href=#21>goto cell # 21</a></small>
# <ul>
#
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy.ndarray.reshape</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [1] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# a.reshape(shape, order='C')
#
# Returns an array containing the same data with a new shape.
#
# Refer to `numpy.reshape` for full documentation.
#
# See Also
# --------
# numpy.reshape : equivalent function
#
# Notes
# -----
# Unlike the free function `numpy.reshape`, this method on `ndarray` allows
# the elements of the shape parameter to be passed in as separate arguments.
# For example, ``a.reshape(10, 11)`` is equivalent to
# ``a.reshape((10, 11))``.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h2>Feature Selection</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Feature Selection" Calls</u></b></summary>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['date'] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 4</u></h3></summary><small><a href=#4>goto cell # 4</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [['date_block_num', 'shop_id', 'item_id']] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 5</u></h3></summary><small><a href=#5>goto cell # 5</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [['item_category_id']] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 7</u></h3></summary><small><a href=#7>goto cell # 7</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['date'] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Model Building and Training</span></h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Model Building and Training" Calls</u></b></summary>
# <ul>
#
# <li> <b>statsmodels</b>
# <ul>
# <li>
# <details><summary><u>statsmodels.base.model.Model.from_formula</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'formula': 'diff ~ lag_1+lag_2+lag_3+lag_4+lag_5+lag_6+lag_7+lag_8+lag_9+lag_10+lag_11+lag_12'}</li></ul>
# <blockquote>
# <code>
# Create a Model from a formula and dataframe.
#
# Parameters
# ----------
# formula : str or generic Formula object
#     The formula specifying the model.
# data : array_like
#     The data for the model. See Notes.
# subset : array_like
#     An array-like object of booleans, integers, or index values that
#     indicate the subset of df to use in the model. Assumes df is a
#     `pandas.DataFrame`.
# drop_cols : array_like
#     Columns to drop from the design matrix.  Cannot be used to
#     drop terms involving categoricals.
# *args
#     Additional positional argument that are passed to the model.
# **kwargs
#     These are passed to the model with one exception. The
#     ``eval_env`` keyword is passed to patsy. It can be either a
#     :class:`patsy:patsy.EvalEnvironment` object or an integer
#     indicating the depth of the namespace to use. For example, the
#     default ``eval_env=0`` uses the calling namespace. If you wish
#     to use a "clean" environment set ``eval_env=-1``.
#
# Returns
# -------
# model
#     The model instance.
#
# Notes
# -----
# data must define __getitem__ with the keys in the formula terms
# args and kwargs are passed on to the model instantiation. E.g.,
# a numpy structured or rec array, a dictionary, or a pandas DataFrame.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 13</u></h3></summary><small><a href=#13>goto cell # 13</a></small>
# <ul>
#
# <li> <b>statsmodels</b>
# <ul>
# <li>
# <details><summary><u>statsmodels.base.model.Model.from_formula</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'formula': 'diff ~ lag_1'}</li></ul>
# <blockquote>
# <code>
# Create a Model from a formula and dataframe.
#
# Parameters
# ----------
# formula : str or generic Formula object
#     The formula specifying the model.
# data : array_like
#     The data for the model. See Notes.
# subset : array_like
#     An array-like object of booleans, integers, or index values that
#     indicate the subset of df to use in the model. Assumes df is a
#     `pandas.DataFrame`.
# drop_cols : array_like
#     Columns to drop from the design matrix.  Cannot be used to
#     drop terms involving categoricals.
# *args
#     Additional positional argument that are passed to the model.
# **kwargs
#     These are passed to the model with one exception. The
#     ``eval_env`` keyword is passed to patsy. It can be either a
#     :class:`patsy:patsy.EvalEnvironment` object or an integer
#     indicating the depth of the namespace to use. For example, the
#     default ``eval_env=0`` uses the calling namespace. If you wish
#     to use a "clean" environment set ``eval_env=-1``.
#
# Returns
# -------
# model
#     The model instance.
#
# Notes
# -----
# data must define __getitem__ with the keys in the formula terms
# args and kwargs are passed on to the model instantiation. E.g.,
# a numpy structured or rec array, a dictionary, or a pandas DataFrame.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 14</u></h3></summary><small><a href=#14>goto cell # 14</a></small>
# <ul>
#
# <li> <b>statsmodels</b>
# <ul>
# <li>
# <details><summary><u>statsmodels.base.model.Model.from_formula</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'formula': 'diff ~ lag_1+lag_2+lag_3+lag_4+lag_5+lag_6+lag_7+lag_8+lag_9+lag_10+lag_11+lag_12'}</li></ul>
# <blockquote>
# <code>
# Create a Model from a formula and dataframe.
#
# Parameters
# ----------
# formula : str or generic Formula object
#     The formula specifying the model.
# data : array_like
#     The data for the model. See Notes.
# subset : array_like
#     An array-like object of booleans, integers, or index values that
#     indicate the subset of df to use in the model. Assumes df is a
#     `pandas.DataFrame`.
# drop_cols : array_like
#     Columns to drop from the design matrix.  Cannot be used to
#     drop terms involving categoricals.
# *args
#     Additional positional argument that are passed to the model.
# **kwargs
#     These are passed to the model with one exception. The
#     ``eval_env`` keyword is passed to patsy. It can be either a
#     :class:`patsy:patsy.EvalEnvironment` object or an integer
#     indicating the depth of the namespace to use. For example, the
#     default ``eval_env=0`` uses the calling namespace. If you wish
#     to use a "clean" environment set ``eval_env=-1``.
#
# Returns
# -------
# model
#     The model instance.
#
# Notes
# -----
# data must define __getitem__ with the keys in the formula terms
# args and kwargs are passed on to the model instantiation. E.g.,
# a numpy structured or rec array, a dictionary, or a pandas DataFrame.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li>
# <ul><li><details><summary><h2>Model Training</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Model Training" Calls</u></b></summary>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# `Sequential` groups a linear stack of layers into a `tf.keras.Model`.
#
# `Sequential` provides training and inference features on this model.
#
# Examples:
#
# ```python
# Optionally, the first layer can receive an `input_shape` argument:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
# model.add(tf.keras.layers.Dense(4))
#
# This is identical to the following:
# model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape=(16,)))
# model.add(tf.keras.layers.Dense(8))
#
# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.weights not created yet
#
# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# model.add(tf.keras.layers.Dense(4))
# len(model.weights)
# Returns "4"
#
# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.build((None, 16))
# len(model.weights)
# Returns "4"
#
# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(1))
# model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
# model.fit(x, y, batch_size=32, epochs=10)
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential.add</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Adds a layer instance on top of the layer stack.
#
# Args:
#     layer: layer instance.
#
# Raises:
#     TypeError: If `layer` is not a layer instance.
#     ValueError: In case the `layer` argument does not
#         know its input shape.
#     ValueError: In case the `layer` argument has
#         multiple output tensors, or is already connected
#         somewhere else (forbidden in `Sequential` models).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.recurrent_v2.LSTM</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [4] | <b>Kwargs:</b> {'stateful': True}</li></ul>
# <blockquote>
# <code>
# Long Short-Term Memory layer - Hochreiter 1997.
#
# See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
# for details about the usage of RNN API.
#
# Based on available runtime hardware and constraints, this layer
# will choose different implementations (cuDNN-based or pure-TensorFlow)
# to maximize the performance. If a GPU is available and all
# the arguments to the layer meet the requirement of the cuDNN kernel
# (see below for details), the layer will use a fast cuDNN implementation.
#
# The requirements to use the cuDNN implementation are:
#
# 1. `activation` == `tanh`
# 2. `recurrent_activation` == `sigmoid`
# 3. `recurrent_dropout` == 0
# 4. `unroll` is `False`
# 5. `use_bias` is `True`
# 6. Inputs, if use masking, are strictly right-padded.
# 7. Eager execution is enabled in the outermost context.
#
# For example:
#
# >>> inputs = tf.random.normal([32, 10, 8])
# >>> lstm = tf.keras.layers.LSTM(4)
# >>> output = lstm(inputs)
# >>> print(output.shape)
# (32, 4)
# >>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True)
# >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)
# >>> print(whole_seq_output.shape)
# (32, 10, 4)
# >>> print(final_memory_state.shape)
# (32, 4)
# >>> print(final_carry_state.shape)
# (32, 4)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation
#     is applied (ie. "linear" activation: `a(x) = x`).
#   recurrent_activation: Activation function to use for the recurrent step.
#     Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
#     applied (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean (default `True`), whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix, used for
#     the linear transformation of the inputs. Default: `glorot_uniform`.
#   recurrent_initializer: Initializer for the `recurrent_kernel` weights
#     matrix, used for the linear transformation of the recurrent state.
#     Default: `orthogonal`.
#   bias_initializer: Initializer for the bias vector. Default: `zeros`.
#   unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of
#     the forget gate at initialization. Setting it to true will also force
#     `bias_initializer="zeros"`. This is recommended in [Jozefowicz et
#         al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf).
#   kernel_regularizer: Regularizer function applied to the `kernel` weights
#     matrix. Default: `None`.
#   recurrent_regularizer: Regularizer function applied to the
#     `recurrent_kernel` weights matrix. Default: `None`.
#   bias_regularizer: Regularizer function applied to the bias vector. Default:
#     `None`.
#   activity_regularizer: Regularizer function applied to the output of the
#     layer (its "activation"). Default: `None`.
#   kernel_constraint: Constraint function applied to the `kernel` weights
#     matrix. Default: `None`.
#   recurrent_constraint: Constraint function applied to the `recurrent_kernel`
#     weights matrix. Default: `None`.
#   bias_constraint: Constraint function applied to the bias vector. Default:
#     `None`.
#   dropout: Float between 0 and 1. Fraction of the units to drop for the linear
#     transformation of the inputs. Default: 0.
#   recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
#     the linear transformation of the recurrent state. Default: 0.
#   return_sequences: Boolean. Whether to return the last output. in the output
#     sequence, or the full sequence. Default: `False`.
#   return_state: Boolean. Whether to return the last state in addition to the
#     output. Default: `False`.
#   go_backwards: Boolean (default `False`). If True, process the input sequence
#     backwards and return the reversed sequence.
#   stateful: Boolean (default `False`). If True, the last state for each sample
#     at index i in a batch will be used as initial state for the sample of
#     index i in the following batch.
#   time_major: The shape format of the `inputs` and `outputs` tensors.
#     If True, the inputs and outputs will be in shape
#     `[timesteps, batch, feature]`, whereas in the False case, it will be
#     `[batch, timesteps, feature]`. Using `time_major = True` is a bit more
#     efficient because it avoids transposes at the beginning and end of the
#     RNN calculation. However, most TensorFlow data is batch-major, so by
#     default this function accepts input and emits output in batch-major
#     form.
#   unroll: Boolean (default `False`). If True, the network will be unrolled,
#     else a symbolic loop will be used. Unrolling can speed-up a RNN, although
#     it tends to be more memory-intensive. Unrolling is only suitable for short
#     sequences.
#
# Call arguments:
#   inputs: A 3D tensor with shape `[batch, timesteps, feature]`.
#   mask: Binary tensor of shape `[batch, timesteps]` indicating whether
#     a given timestep should be masked (optional, defaults to `None`).
#     An individual `True` entry indicates that the corresponding timestep
#     should be utilized, while a `False` entry indicates that the corresponding
#     timestep should be ignored.
#   training: Python boolean indicating whether the layer should behave in
#     training mode or in inference mode. This argument is passed to the cell
#     when calling it. This is only relevant if `dropout` or
#     `recurrent_dropout` is used (optional, defaults to `None`).
#   initial_state: List of initial state tensors to be passed to the first
#     call of the cell (optional, defaults to `None` which causes creation
#     of zero-filled initial state tensors).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.core.dense.Dense</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [1] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Just your regular densely-connected NN layer.
#
# `Dense` implements the operation:
# `output = activation(dot(input, kernel) + bias)`
# where `activation` is the element-wise activation function
# passed as the `activation` argument, `kernel` is a weights matrix
# created by the layer, and `bias` is a bias vector created by the layer
# (only applicable if `use_bias` is `True`). These are all attributes of
# `Dense`.
#
# Note: If the input to the layer has a rank greater than 2, then `Dense`
# computes the dot product between the `inputs` and the `kernel` along the
# last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
# For example, if input has dimensions `(batch_size, d0, d1)`,
# then we create a `kernel` with shape `(d1, units)`, and the `kernel` operates
# along axis 2 of the `input`, on every sub-tensor of shape `(1, 1, d1)`
# (there are `batch_size * d0` such sub-tensors).
# The output in this case will have shape `(batch_size, d0, units)`.
#
# Besides, layer attributes cannot be modified after the layer has been called
# once (except the `trainable` attribute).
# When a popular kwarg `input_shape` is passed, then keras will create
# an input layer to insert before the current layer. This can be treated
# equivalent to explicitly defining an `InputLayer`.
#
# Example:
#
# >>> # Create a `Sequential` model and add a Dense layer as the first layer.
# >>> model = tf.keras.models.Sequential()
# >>> model.add(tf.keras.Input(shape=(16,)))
# >>> model.add(tf.keras.layers.Dense(32, activation='relu'))
# >>> # Now the model will take as input arrays of shape (None, 16)
# >>> # and output arrays of shape (None, 32).
# >>> # Note that after the first layer, you don't need to specify
# >>> # the size of the input anymore:
# >>> model.add(tf.keras.layers.Dense(32))
# >>> model.output_shape
# (None, 32)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     If you don't specify anything, no activation is applied
#     (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean, whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix.
#   bias_initializer: Initializer for the bias vector.
#   kernel_regularizer: Regularizer function applied to
#     the `kernel` weights matrix.
#   bias_regularizer: Regularizer function applied to the bias vector.
#   activity_regularizer: Regularizer function applied to
#     the output of the layer (its "activation").
#   kernel_constraint: Constraint function applied to
#     the `kernel` weights matrix.
#   bias_constraint: Constraint function applied to the bias vector.
#
# Input shape:
#   N-D tensor with shape: `(batch_size, ..., input_dim)`.
#   The most common situation would be
#   a 2D input with shape `(batch_size, input_dim)`.
#
# Output shape:
#   N-D tensor with shape: `(batch_size, ..., units)`.
#   For instance, for a 2D input with shape `(batch_size, input_dim)`,
#   the output would have shape `(batch_size, units)`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.compile</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'loss': 'mean_squared_error', 'optimizer': 'adam'}</li></ul>
# <blockquote>
# <code>
# Configures the model for training.
#
# Example:
#
# ```python
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.BinaryCrossentropy(),
#               metrics=[tf.keras.metrics.BinaryAccuracy(),
#                        tf.keras.metrics.FalseNegatives()])
# ```
#
# Args:
#     optimizer: String (name of optimizer) or optimizer instance. See
#       `tf.keras.optimizers`.
#     loss: Loss function. Maybe be a string (name of loss function), or
#       a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss
#       function is any callable with the signature `loss = fn(y_true,
#       y_pred)`, where `y_true` are the ground truth values, and
#       `y_pred` are the model's predictions.
#       `y_true` should have shape
#       `(batch_size, d0, .. dN)` (except in the case of
#       sparse loss functions such as
#       sparse categorical crossentropy which expects integer arrays of shape
#       `(batch_size, d0, .. dN-1)`).
#       `y_pred` should have shape `(batch_size, d0, .. dN)`.
#       The loss function should return a float tensor.
#       If a custom `Loss` instance is
#       used and reduction is set to `None`, return value has shape
#       `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss
#       values; otherwise, it is a scalar. If the model has multiple outputs,
#       you can use a different loss on each output by passing a dictionary
#       or a list of losses. The loss value that will be minimized by the
#       model will then be the sum of all individual losses, unless
#       `loss_weights` is specified.
#     metrics: List of metrics to be evaluated by the model during training
#       and testing. Each of this can be a string (name of a built-in
#       function), function or a `tf.keras.metrics.Metric` instance. See
#       `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A
#       function is any callable with the signature `result = fn(y_true,
#       y_pred)`. To specify different metrics for different outputs of a
#       multi-output model, you could also pass a dictionary, such as
#       `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
#       You can also pass a list to specify a metric or a list of metrics
#       for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]`
#       or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the
#       strings 'accuracy' or 'acc', we convert this to one of
#       `tf.keras.metrics.BinaryAccuracy`,
#       `tf.keras.metrics.CategoricalAccuracy`,
#       `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss
#       function used and the model output shape. We do a similar
#       conversion for the strings 'crossentropy' and 'ce' as well.
#     loss_weights: Optional list or dictionary specifying scalar coefficients
#       (Python floats) to weight the loss contributions of different model
#       outputs. The loss value that will be minimized by the model will then
#       be the *weighted sum* of all individual losses, weighted by the
#       `loss_weights` coefficients.
#         If a list, it is expected to have a 1:1 mapping to the model's
#           outputs. If a dict, it is expected to map output names (strings)
#           to scalar coefficients.
#     weighted_metrics: List of metrics to be evaluated and weighted by
#       `sample_weight` or `class_weight` during training and testing.
#     run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s
#       logic will not be wrapped in a `tf.function`. Recommended to leave
#       this as `None` unless your `Model` cannot be run inside a
#       `tf.function`. `run_eagerly=True` is not supported when using
#       `tf.distribute.experimental.ParameterServerStrategy`.
#     steps_per_execution: Int. Defaults to 1. The number of batches to run
#       during each `tf.function` call. Running multiple batches inside a
#       single `tf.function` call can greatly improve performance on TPUs or
#       small models with a large Python overhead. At most, one full epoch
#       will be run each execution. If a number larger than the size of the
#       epoch is passed, the execution will be truncated to the size of the
#       epoch. Note that if `steps_per_execution` is set to `N`,
#       `Callback.on_batch_begin` and `Callback.on_batch_end` methods will
#       only be called every `N` batches (i.e. before/after each `tf.function`
#       execution).
#     jit_compile: If `True`, compile the model training step with XLA.
#       [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for
#       machine learning.
#       `jit_compile` is not enabled for by default.
#       This option cannot be enabled with `run_eagerly=True`.
#       Note that `jit_compile=True` is
#       may not necessarily work for all models.
#       For more information on supported operations please refer to the
#       [XLA documentation](https://www.tensorflow.org/xla).
#       Also refer to
#       [known XLA issues](https://www.tensorflow.org/xla/known_issues) for
#       more details.
#     **kwargs: Arguments supported for backwards compatibility only.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.fit</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'nb_epoch': 50, 'batch_size': 1, 'verbose': 1, 'shuffle': False}</li></ul>
# <blockquote>
# <code>
# Trains the model for a fixed number of epochs (iterations on a dataset).
#
# Args:
#     x: Input data. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A dict mapping input names to the corresponding array/tensors,
#         if the model has named inputs.
#       - A `tf.data` dataset. Should return a tuple
#         of either `(inputs, targets)` or
#         `(inputs, targets, sample_weights)`.
#       - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
#         or `(inputs, targets, sample_weights)`.
#       - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
#         callable that takes a single argument of type
#         `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
#         `DatasetCreator` should be used when users prefer to specify the
#         per-replica batching and sharding logic for the `Dataset`.
#         See `tf.keras.utils.experimental.DatasetCreator` doc for more
#         information.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given below. If using
#       `tf.distribute.experimental.ParameterServerStrategy`, only
#       `DatasetCreator` type is supported for `x`.
#     y: Target data. Like the input data `x`,
#       it could be either Numpy array(s) or TensorFlow tensor(s).
#       It should be consistent with `x` (you cannot have Numpy inputs and
#       tensor targets, or inversely). If `x` is a dataset, generator,
#       or `keras.utils.Sequence` instance, `y` should
#       not be specified (since targets will be obtained from `x`).
#     batch_size: Integer or `None`.
#         Number of samples per gradient update.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     epochs: Integer. Number of epochs to train the model.
#         An epoch is an iteration over the entire `x` and `y`
#         data provided
#         (unless the `steps_per_epoch` flag is set to
#         something other than None).
#         Note that in conjunction with `initial_epoch`,
#         `epochs` is to be understood as "final epoch".
#         The model is not trained for a number of iterations
#         given by `epochs`, but merely until the epoch
#         of index `epochs` is reached.
#     verbose: 'auto', 0, 1, or 2. Verbosity mode.
#         0 = silent, 1 = progress bar, 2 = one line per epoch.
#         'auto' defaults to 1 for most cases, but 2 when used with
#         `ParameterServerStrategy`. Note that the progress bar is not
#         particularly useful when logged to a file, so verbose=2 is
#         recommended when not running interactively (eg, in a production
#         environment).
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during training.
#         See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger`
#         and `tf.keras.callbacks.History` callbacks are created automatically
#         and need not be passed into `model.fit`.
#         `tf.keras.callbacks.ProgbarLogger` is created or not based on
#         `verbose` argument to `model.fit`.
#         Callbacks with batch-level calls are currently unsupported with
#         `tf.distribute.experimental.ParameterServerStrategy`, and users are
#         advised to implement epoch-level calls instead with an appropriate
#         `steps_per_epoch` value.
#     validation_split: Float between 0 and 1.
#         Fraction of the training data to be used as validation data.
#         The model will set apart this fraction of the training data,
#         will not train on it, and will evaluate
#         the loss and any model metrics
#         on this data at the end of each epoch.
#         The validation data is selected from the last samples
#         in the `x` and `y` data provided, before shuffling. This argument is
#         not supported when `x` is a dataset, generator or
#        `keras.utils.Sequence` instance.
#         `validation_split` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     validation_data: Data on which to evaluate
#         the loss and any model metrics at the end of each epoch.
#         The model will not be trained on this data. Thus, note the fact
#         that the validation loss of data provided using `validation_split`
#         or `validation_data` is not affected by regularization layers like
#         noise and dropout.
#         `validation_data` will override `validation_split`.
#         `validation_data` could be:
#           - A tuple `(x_val, y_val)` of Numpy arrays or tensors.
#           - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays.
#           - A `tf.data.Dataset`.
#           - A Python generator or `keras.utils.Sequence` returning
#           `(inputs, targets)` or `(inputs, targets, sample_weights)`.
#         `validation_data` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     shuffle: Boolean (whether to shuffle the training data
#         before each epoch) or str (for 'batch'). This argument is ignored
#         when `x` is a generator or an object of tf.data.Dataset.
#         'batch' is a special option for dealing
#         with the limitations of HDF5 data; it shuffles in batch-sized
#         chunks. Has no effect when `steps_per_epoch` is not `None`.
#     class_weight: Optional dictionary mapping class indices (integers)
#         to a weight (float) value, used for weighting the loss function
#         (during training only).
#         This can be useful to tell the model to
#         "pay more attention" to samples from
#         an under-represented class.
#     sample_weight: Optional Numpy array of weights for
#         the training samples, used for weighting the loss function
#         (during training only). You can either pass a flat (1D)
#         Numpy array with the same length as the input samples
#         (1:1 mapping between weights and samples),
#         or in the case of temporal data,
#         you can pass a 2D array with shape
#         `(samples, sequence_length)`,
#         to apply a different weight to every timestep of every sample. This
#         argument is not supported when `x` is a dataset, generator, or
#        `keras.utils.Sequence` instance, instead provide the sample_weights
#         as the third element of `x`.
#     initial_epoch: Integer.
#         Epoch at which to start training
#         (useful for resuming a previous training run).
#     steps_per_epoch: Integer or `None`.
#         Total number of steps (batches of samples)
#         before declaring one epoch finished and starting the
#         next epoch. When training with input tensors such as
#         TensorFlow data tensors, the default `None` is equal to
#         the number of samples in your dataset divided by
#         the batch size, or 1 if that cannot be determined. If x is a
#         `tf.data` dataset, and 'steps_per_epoch'
#         is None, the epoch will run until the input dataset is exhausted.
#         When passing an infinitely repeating dataset, you must specify the
#         `steps_per_epoch` argument. If `steps_per_epoch=-1` the training
#         will run indefinitely with an infinitely repeating dataset.
#         This argument is not supported with array inputs.
#         When using `tf.distribute.experimental.ParameterServerStrategy`:
#           * `steps_per_epoch=None` is not supported.
#     validation_steps: Only relevant if `validation_data` is provided and
#         is a `tf.data` dataset. Total number of steps (batches of
#         samples) to draw before stopping when performing validation
#         at the end of every epoch. If 'validation_steps' is None, validation
#         will run until the `validation_data` dataset is exhausted. In the
#         case of an infinitely repeated dataset, it will run into an
#         infinite loop. If 'validation_steps' is specified and only part of
#         the dataset will be consumed, the evaluation will start from the
#         beginning of the dataset at each epoch. This ensures that the same
#         validation samples are used every time.
#     validation_batch_size: Integer or `None`.
#         Number of samples per validation batch.
#         If unspecified, will default to `batch_size`.
#         Do not specify the `validation_batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     validation_freq: Only relevant if validation data is provided. Integer
#         or `collections.abc.Container` instance (e.g. list, tuple, etc.).
#         If an integer, specifies how many training epochs to run before a
#         new validation run is performed, e.g. `validation_freq=2` runs
#         validation every 2 epochs. If a Container, specifies the epochs on
#         which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
#         validation at the end of the 1st, 2nd, and 10th epochs.
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up
#         when using process-based threading. If unspecified, `workers`
#         will default to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# Unpacking behavior for iterator-like inputs:
#     A common pattern is to pass a tf.data.Dataset, generator, or
#   tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
#   yield not only features (x) but optionally targets (y) and sample weights.
#   Keras requires that the output of such iterator-likes be unambiguous. The
#   iterator should return a tuple of length 1, 2, or 3, where the optional
#   second and third elements will be used for y and sample_weight
#   respectively. Any other type provided will be wrapped in a length one
#   tuple, effectively treating everything as 'x'. When yielding dicts, they
#   should still adhere to the top-level tuple structure.
#   e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
#   features, targets, and weights from the keys of a single dict.
#     A notable unsupported data type is the namedtuple. The reason is that
#   it behaves like both an ordered datatype (tuple) and a mapping
#   datatype (dict). So given a namedtuple of the form:
#       `namedtuple("example_tuple", ["y", "x"])`
#   it is ambiguous whether to reverse the order of the elements when
#   interpreting the value. Even worse is a tuple of the form:
#       `namedtuple("other_tuple", ["x", "y", "z"])`
#   where it is unclear if the tuple was intended to be unpacked into x, y,
#   and sample_weight or passed through as a single element to `x`. As a
#   result the data processing code will simply raise a ValueError if it
#   encounters a namedtuple. (Along with instructions to remedy the issue.)
#
# Returns:
#     A `History` object. Its `History.history` attribute is
#     a record of training loss values and metrics values
#     at successive epochs, as well as validation loss values
#     and validation metrics values (if applicable).
#
# Raises:
#     RuntimeError: 1. If the model was never compiled or,
#     2. If `model.fit` is  wrapped in `tf.function`.
#
#     ValueError: In case of mismatch between the provided input data
#         and what the model expects or when the input data is empty.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 19</u></h3></summary><small><a href=#19>goto cell # 19</a></small>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# `Sequential` groups a linear stack of layers into a `tf.keras.Model`.
#
# `Sequential` provides training and inference features on this model.
#
# Examples:
#
# ```python
# Optionally, the first layer can receive an `input_shape` argument:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
# model.add(tf.keras.layers.Dense(4))
#
# This is identical to the following:
# model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape=(16,)))
# model.add(tf.keras.layers.Dense(8))
#
# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.weights not created yet
#
# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# model.add(tf.keras.layers.Dense(4))
# len(model.weights)
# Returns "4"
#
# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.build((None, 16))
# len(model.weights)
# Returns "4"
#
# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(1))
# model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
# model.fit(x, y, batch_size=32, epochs=10)
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential.add</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Adds a layer instance on top of the layer stack.
#
# Args:
#     layer: layer instance.
#
# Raises:
#     TypeError: If `layer` is not a layer instance.
#     ValueError: In case the `layer` argument does not
#         know its input shape.
#     ValueError: In case the `layer` argument has
#         multiple output tensors, or is already connected
#         somewhere else (forbidden in `Sequential` models).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.recurrent_v2.LSTM</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [4] | <b>Kwargs:</b> {'stateful': True}</li></ul>
# <blockquote>
# <code>
# Long Short-Term Memory layer - Hochreiter 1997.
#
# See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
# for details about the usage of RNN API.
#
# Based on available runtime hardware and constraints, this layer
# will choose different implementations (cuDNN-based or pure-TensorFlow)
# to maximize the performance. If a GPU is available and all
# the arguments to the layer meet the requirement of the cuDNN kernel
# (see below for details), the layer will use a fast cuDNN implementation.
#
# The requirements to use the cuDNN implementation are:
#
# 1. `activation` == `tanh`
# 2. `recurrent_activation` == `sigmoid`
# 3. `recurrent_dropout` == 0
# 4. `unroll` is `False`
# 5. `use_bias` is `True`
# 6. Inputs, if use masking, are strictly right-padded.
# 7. Eager execution is enabled in the outermost context.
#
# For example:
#
# >>> inputs = tf.random.normal([32, 10, 8])
# >>> lstm = tf.keras.layers.LSTM(4)
# >>> output = lstm(inputs)
# >>> print(output.shape)
# (32, 4)
# >>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True)
# >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)
# >>> print(whole_seq_output.shape)
# (32, 10, 4)
# >>> print(final_memory_state.shape)
# (32, 4)
# >>> print(final_carry_state.shape)
# (32, 4)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation
#     is applied (ie. "linear" activation: `a(x) = x`).
#   recurrent_activation: Activation function to use for the recurrent step.
#     Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
#     applied (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean (default `True`), whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix, used for
#     the linear transformation of the inputs. Default: `glorot_uniform`.
#   recurrent_initializer: Initializer for the `recurrent_kernel` weights
#     matrix, used for the linear transformation of the recurrent state.
#     Default: `orthogonal`.
#   bias_initializer: Initializer for the bias vector. Default: `zeros`.
#   unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of
#     the forget gate at initialization. Setting it to true will also force
#     `bias_initializer="zeros"`. This is recommended in [Jozefowicz et
#         al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf).
#   kernel_regularizer: Regularizer function applied to the `kernel` weights
#     matrix. Default: `None`.
#   recurrent_regularizer: Regularizer function applied to the
#     `recurrent_kernel` weights matrix. Default: `None`.
#   bias_regularizer: Regularizer function applied to the bias vector. Default:
#     `None`.
#   activity_regularizer: Regularizer function applied to the output of the
#     layer (its "activation"). Default: `None`.
#   kernel_constraint: Constraint function applied to the `kernel` weights
#     matrix. Default: `None`.
#   recurrent_constraint: Constraint function applied to the `recurrent_kernel`
#     weights matrix. Default: `None`.
#   bias_constraint: Constraint function applied to the bias vector. Default:
#     `None`.
#   dropout: Float between 0 and 1. Fraction of the units to drop for the linear
#     transformation of the inputs. Default: 0.
#   recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
#     the linear transformation of the recurrent state. Default: 0.
#   return_sequences: Boolean. Whether to return the last output. in the output
#     sequence, or the full sequence. Default: `False`.
#   return_state: Boolean. Whether to return the last state in addition to the
#     output. Default: `False`.
#   go_backwards: Boolean (default `False`). If True, process the input sequence
#     backwards and return the reversed sequence.
#   stateful: Boolean (default `False`). If True, the last state for each sample
#     at index i in a batch will be used as initial state for the sample of
#     index i in the following batch.
#   time_major: The shape format of the `inputs` and `outputs` tensors.
#     If True, the inputs and outputs will be in shape
#     `[timesteps, batch, feature]`, whereas in the False case, it will be
#     `[batch, timesteps, feature]`. Using `time_major = True` is a bit more
#     efficient because it avoids transposes at the beginning and end of the
#     RNN calculation. However, most TensorFlow data is batch-major, so by
#     default this function accepts input and emits output in batch-major
#     form.
#   unroll: Boolean (default `False`). If True, the network will be unrolled,
#     else a symbolic loop will be used. Unrolling can speed-up a RNN, although
#     it tends to be more memory-intensive. Unrolling is only suitable for short
#     sequences.
#
# Call arguments:
#   inputs: A 3D tensor with shape `[batch, timesteps, feature]`.
#   mask: Binary tensor of shape `[batch, timesteps]` indicating whether
#     a given timestep should be masked (optional, defaults to `None`).
#     An individual `True` entry indicates that the corresponding timestep
#     should be utilized, while a `False` entry indicates that the corresponding
#     timestep should be ignored.
#   training: Python boolean indicating whether the layer should behave in
#     training mode or in inference mode. This argument is passed to the cell
#     when calling it. This is only relevant if `dropout` or
#     `recurrent_dropout` is used (optional, defaults to `None`).
#   initial_state: List of initial state tensors to be passed to the first
#     call of the cell (optional, defaults to `None` which causes creation
#     of zero-filled initial state tensors).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.core.dense.Dense</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [1] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Just your regular densely-connected NN layer.
#
# `Dense` implements the operation:
# `output = activation(dot(input, kernel) + bias)`
# where `activation` is the element-wise activation function
# passed as the `activation` argument, `kernel` is a weights matrix
# created by the layer, and `bias` is a bias vector created by the layer
# (only applicable if `use_bias` is `True`). These are all attributes of
# `Dense`.
#
# Note: If the input to the layer has a rank greater than 2, then `Dense`
# computes the dot product between the `inputs` and the `kernel` along the
# last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
# For example, if input has dimensions `(batch_size, d0, d1)`,
# then we create a `kernel` with shape `(d1, units)`, and the `kernel` operates
# along axis 2 of the `input`, on every sub-tensor of shape `(1, 1, d1)`
# (there are `batch_size * d0` such sub-tensors).
# The output in this case will have shape `(batch_size, d0, units)`.
#
# Besides, layer attributes cannot be modified after the layer has been called
# once (except the `trainable` attribute).
# When a popular kwarg `input_shape` is passed, then keras will create
# an input layer to insert before the current layer. This can be treated
# equivalent to explicitly defining an `InputLayer`.
#
# Example:
#
# >>> # Create a `Sequential` model and add a Dense layer as the first layer.
# >>> model = tf.keras.models.Sequential()
# >>> model.add(tf.keras.Input(shape=(16,)))
# >>> model.add(tf.keras.layers.Dense(32, activation='relu'))
# >>> # Now the model will take as input arrays of shape (None, 16)
# >>> # and output arrays of shape (None, 32).
# >>> # Note that after the first layer, you don't need to specify
# >>> # the size of the input anymore:
# >>> model.add(tf.keras.layers.Dense(32))
# >>> model.output_shape
# (None, 32)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     If you don't specify anything, no activation is applied
#     (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean, whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix.
#   bias_initializer: Initializer for the bias vector.
#   kernel_regularizer: Regularizer function applied to
#     the `kernel` weights matrix.
#   bias_regularizer: Regularizer function applied to the bias vector.
#   activity_regularizer: Regularizer function applied to
#     the output of the layer (its "activation").
#   kernel_constraint: Constraint function applied to
#     the `kernel` weights matrix.
#   bias_constraint: Constraint function applied to the bias vector.
#
# Input shape:
#   N-D tensor with shape: `(batch_size, ..., input_dim)`.
#   The most common situation would be
#   a 2D input with shape `(batch_size, input_dim)`.
#
# Output shape:
#   N-D tensor with shape: `(batch_size, ..., units)`.
#   For instance, for a 2D input with shape `(batch_size, input_dim)`,
#   the output would have shape `(batch_size, units)`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.compile</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'loss': 'mean_squared_error', 'optimizer': 'adam'}</li></ul>
# <blockquote>
# <code>
# Configures the model for training.
#
# Example:
#
# ```python
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.BinaryCrossentropy(),
#               metrics=[tf.keras.metrics.BinaryAccuracy(),
#                        tf.keras.metrics.FalseNegatives()])
# ```
#
# Args:
#     optimizer: String (name of optimizer) or optimizer instance. See
#       `tf.keras.optimizers`.
#     loss: Loss function. Maybe be a string (name of loss function), or
#       a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss
#       function is any callable with the signature `loss = fn(y_true,
#       y_pred)`, where `y_true` are the ground truth values, and
#       `y_pred` are the model's predictions.
#       `y_true` should have shape
#       `(batch_size, d0, .. dN)` (except in the case of
#       sparse loss functions such as
#       sparse categorical crossentropy which expects integer arrays of shape
#       `(batch_size, d0, .. dN-1)`).
#       `y_pred` should have shape `(batch_size, d0, .. dN)`.
#       The loss function should return a float tensor.
#       If a custom `Loss` instance is
#       used and reduction is set to `None`, return value has shape
#       `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss
#       values; otherwise, it is a scalar. If the model has multiple outputs,
#       you can use a different loss on each output by passing a dictionary
#       or a list of losses. The loss value that will be minimized by the
#       model will then be the sum of all individual losses, unless
#       `loss_weights` is specified.
#     metrics: List of metrics to be evaluated by the model during training
#       and testing. Each of this can be a string (name of a built-in
#       function), function or a `tf.keras.metrics.Metric` instance. See
#       `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A
#       function is any callable with the signature `result = fn(y_true,
#       y_pred)`. To specify different metrics for different outputs of a
#       multi-output model, you could also pass a dictionary, such as
#       `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
#       You can also pass a list to specify a metric or a list of metrics
#       for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]`
#       or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the
#       strings 'accuracy' or 'acc', we convert this to one of
#       `tf.keras.metrics.BinaryAccuracy`,
#       `tf.keras.metrics.CategoricalAccuracy`,
#       `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss
#       function used and the model output shape. We do a similar
#       conversion for the strings 'crossentropy' and 'ce' as well.
#     loss_weights: Optional list or dictionary specifying scalar coefficients
#       (Python floats) to weight the loss contributions of different model
#       outputs. The loss value that will be minimized by the model will then
#       be the *weighted sum* of all individual losses, weighted by the
#       `loss_weights` coefficients.
#         If a list, it is expected to have a 1:1 mapping to the model's
#           outputs. If a dict, it is expected to map output names (strings)
#           to scalar coefficients.
#     weighted_metrics: List of metrics to be evaluated and weighted by
#       `sample_weight` or `class_weight` during training and testing.
#     run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s
#       logic will not be wrapped in a `tf.function`. Recommended to leave
#       this as `None` unless your `Model` cannot be run inside a
#       `tf.function`. `run_eagerly=True` is not supported when using
#       `tf.distribute.experimental.ParameterServerStrategy`.
#     steps_per_execution: Int. Defaults to 1. The number of batches to run
#       during each `tf.function` call. Running multiple batches inside a
#       single `tf.function` call can greatly improve performance on TPUs or
#       small models with a large Python overhead. At most, one full epoch
#       will be run each execution. If a number larger than the size of the
#       epoch is passed, the execution will be truncated to the size of the
#       epoch. Note that if `steps_per_execution` is set to `N`,
#       `Callback.on_batch_begin` and `Callback.on_batch_end` methods will
#       only be called every `N` batches (i.e. before/after each `tf.function`
#       execution).
#     jit_compile: If `True`, compile the model training step with XLA.
#       [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for
#       machine learning.
#       `jit_compile` is not enabled for by default.
#       This option cannot be enabled with `run_eagerly=True`.
#       Note that `jit_compile=True` is
#       may not necessarily work for all models.
#       For more information on supported operations please refer to the
#       [XLA documentation](https://www.tensorflow.org/xla).
#       Also refer to
#       [known XLA issues](https://www.tensorflow.org/xla/known_issues) for
#       more details.
#     **kwargs: Arguments supported for backwards compatibility only.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.fit</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'nb_epoch': 50, 'batch_size': 1, 'verbose': 1, 'shuffle': False}</li></ul>
# <blockquote>
# <code>
# Trains the model for a fixed number of epochs (iterations on a dataset).
#
# Args:
#     x: Input data. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A dict mapping input names to the corresponding array/tensors,
#         if the model has named inputs.
#       - A `tf.data` dataset. Should return a tuple
#         of either `(inputs, targets)` or
#         `(inputs, targets, sample_weights)`.
#       - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
#         or `(inputs, targets, sample_weights)`.
#       - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
#         callable that takes a single argument of type
#         `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
#         `DatasetCreator` should be used when users prefer to specify the
#         per-replica batching and sharding logic for the `Dataset`.
#         See `tf.keras.utils.experimental.DatasetCreator` doc for more
#         information.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given below. If using
#       `tf.distribute.experimental.ParameterServerStrategy`, only
#       `DatasetCreator` type is supported for `x`.
#     y: Target data. Like the input data `x`,
#       it could be either Numpy array(s) or TensorFlow tensor(s).
#       It should be consistent with `x` (you cannot have Numpy inputs and
#       tensor targets, or inversely). If `x` is a dataset, generator,
#       or `keras.utils.Sequence` instance, `y` should
#       not be specified (since targets will be obtained from `x`).
#     batch_size: Integer or `None`.
#         Number of samples per gradient update.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     epochs: Integer. Number of epochs to train the model.
#         An epoch is an iteration over the entire `x` and `y`
#         data provided
#         (unless the `steps_per_epoch` flag is set to
#         something other than None).
#         Note that in conjunction with `initial_epoch`,
#         `epochs` is to be understood as "final epoch".
#         The model is not trained for a number of iterations
#         given by `epochs`, but merely until the epoch
#         of index `epochs` is reached.
#     verbose: 'auto', 0, 1, or 2. Verbosity mode.
#         0 = silent, 1 = progress bar, 2 = one line per epoch.
#         'auto' defaults to 1 for most cases, but 2 when used with
#         `ParameterServerStrategy`. Note that the progress bar is not
#         particularly useful when logged to a file, so verbose=2 is
#         recommended when not running interactively (eg, in a production
#         environment).
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during training.
#         See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger`
#         and `tf.keras.callbacks.History` callbacks are created automatically
#         and need not be passed into `model.fit`.
#         `tf.keras.callbacks.ProgbarLogger` is created or not based on
#         `verbose` argument to `model.fit`.
#         Callbacks with batch-level calls are currently unsupported with
#         `tf.distribute.experimental.ParameterServerStrategy`, and users are
#         advised to implement epoch-level calls instead with an appropriate
#         `steps_per_epoch` value.
#     validation_split: Float between 0 and 1.
#         Fraction of the training data to be used as validation data.
#         The model will set apart this fraction of the training data,
#         will not train on it, and will evaluate
#         the loss and any model metrics
#         on this data at the end of each epoch.
#         The validation data is selected from the last samples
#         in the `x` and `y` data provided, before shuffling. This argument is
#         not supported when `x` is a dataset, generator or
#        `keras.utils.Sequence` instance.
#         `validation_split` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     validation_data: Data on which to evaluate
#         the loss and any model metrics at the end of each epoch.
#         The model will not be trained on this data. Thus, note the fact
#         that the validation loss of data provided using `validation_split`
#         or `validation_data` is not affected by regularization layers like
#         noise and dropout.
#         `validation_data` will override `validation_split`.
#         `validation_data` could be:
#           - A tuple `(x_val, y_val)` of Numpy arrays or tensors.
#           - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays.
#           - A `tf.data.Dataset`.
#           - A Python generator or `keras.utils.Sequence` returning
#           `(inputs, targets)` or `(inputs, targets, sample_weights)`.
#         `validation_data` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     shuffle: Boolean (whether to shuffle the training data
#         before each epoch) or str (for 'batch'). This argument is ignored
#         when `x` is a generator or an object of tf.data.Dataset.
#         'batch' is a special option for dealing
#         with the limitations of HDF5 data; it shuffles in batch-sized
#         chunks. Has no effect when `steps_per_epoch` is not `None`.
#     class_weight: Optional dictionary mapping class indices (integers)
#         to a weight (float) value, used for weighting the loss function
#         (during training only).
#         This can be useful to tell the model to
#         "pay more attention" to samples from
#         an under-represented class.
#     sample_weight: Optional Numpy array of weights for
#         the training samples, used for weighting the loss function
#         (during training only). You can either pass a flat (1D)
#         Numpy array with the same length as the input samples
#         (1:1 mapping between weights and samples),
#         or in the case of temporal data,
#         you can pass a 2D array with shape
#         `(samples, sequence_length)`,
#         to apply a different weight to every timestep of every sample. This
#         argument is not supported when `x` is a dataset, generator, or
#        `keras.utils.Sequence` instance, instead provide the sample_weights
#         as the third element of `x`.
#     initial_epoch: Integer.
#         Epoch at which to start training
#         (useful for resuming a previous training run).
#     steps_per_epoch: Integer or `None`.
#         Total number of steps (batches of samples)
#         before declaring one epoch finished and starting the
#         next epoch. When training with input tensors such as
#         TensorFlow data tensors, the default `None` is equal to
#         the number of samples in your dataset divided by
#         the batch size, or 1 if that cannot be determined. If x is a
#         `tf.data` dataset, and 'steps_per_epoch'
#         is None, the epoch will run until the input dataset is exhausted.
#         When passing an infinitely repeating dataset, you must specify the
#         `steps_per_epoch` argument. If `steps_per_epoch=-1` the training
#         will run indefinitely with an infinitely repeating dataset.
#         This argument is not supported with array inputs.
#         When using `tf.distribute.experimental.ParameterServerStrategy`:
#           * `steps_per_epoch=None` is not supported.
#     validation_steps: Only relevant if `validation_data` is provided and
#         is a `tf.data` dataset. Total number of steps (batches of
#         samples) to draw before stopping when performing validation
#         at the end of every epoch. If 'validation_steps' is None, validation
#         will run until the `validation_data` dataset is exhausted. In the
#         case of an infinitely repeated dataset, it will run into an
#         infinite loop. If 'validation_steps' is specified and only part of
#         the dataset will be consumed, the evaluation will start from the
#         beginning of the dataset at each epoch. This ensures that the same
#         validation samples are used every time.
#     validation_batch_size: Integer or `None`.
#         Number of samples per validation batch.
#         If unspecified, will default to `batch_size`.
#         Do not specify the `validation_batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     validation_freq: Only relevant if validation data is provided. Integer
#         or `collections.abc.Container` instance (e.g. list, tuple, etc.).
#         If an integer, specifies how many training epochs to run before a
#         new validation run is performed, e.g. `validation_freq=2` runs
#         validation every 2 epochs. If a Container, specifies the epochs on
#         which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
#         validation at the end of the 1st, 2nd, and 10th epochs.
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up
#         when using process-based threading. If unspecified, `workers`
#         will default to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# Unpacking behavior for iterator-like inputs:
#     A common pattern is to pass a tf.data.Dataset, generator, or
#   tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
#   yield not only features (x) but optionally targets (y) and sample weights.
#   Keras requires that the output of such iterator-likes be unambiguous. The
#   iterator should return a tuple of length 1, 2, or 3, where the optional
#   second and third elements will be used for y and sample_weight
#   respectively. Any other type provided will be wrapped in a length one
#   tuple, effectively treating everything as 'x'. When yielding dicts, they
#   should still adhere to the top-level tuple structure.
#   e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
#   features, targets, and weights from the keys of a single dict.
#     A notable unsupported data type is the namedtuple. The reason is that
#   it behaves like both an ordered datatype (tuple) and a mapping
#   datatype (dict). So given a namedtuple of the form:
#       `namedtuple("example_tuple", ["y", "x"])`
#   it is ambiguous whether to reverse the order of the elements when
#   interpreting the value. Even worse is a tuple of the form:
#       `namedtuple("other_tuple", ["x", "y", "z"])`
#   where it is unclear if the tuple was intended to be unpacked into x, y,
#   and sample_weight or passed through as a single element to `x`. As a
#   result the data processing code will simply raise a ValueError if it
#   encounters a namedtuple. (Along with instructions to remedy the issue.)
#
# Returns:
#     A `History` object. Its `History.history` attribute is
#     a record of training loss values and metrics values
#     at successive epochs, as well as validation loss values
#     and validation metrics values (if applicable).
#
# Raises:
#     RuntimeError: 1. If the model was never compiled or,
#     2. If `model.fit` is  wrapped in `tf.function`.
#
#     ValueError: In case of mismatch between the provided input data
#         and what the model expects or when the input data is empty.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Model Parameter Tuning</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h2>Model Validation and Assembling</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Model Validation and Assembling" Calls</u></b></summary>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.engine.training.Model.predict</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'batch_size': 1}</li></ul>
# <blockquote>
# <code>
# Generates output predictions for the input samples.
#
# Computation is done in batches. This method is designed for batch processing
# of large numbers of inputs. It is not intended for use inside of loops
# that iterate over your data and process small numbers of inputs at a time.
#
# For small numbers of inputs that fit in one batch,
# directly use `__call__()` for faster execution, e.g.,
# `model(x)`, or `model(x, training=False)` if you have layers such as
# `tf.keras.layers.BatchNormalization` that behave differently during
# inference. You may pair the individual model call with a `tf.function`
# for additional performance inside your inner loop.
# If you need access to numpy array values instead of tensors after your
# model call, you can use `tensor.numpy()` to get the numpy array value of
# an eager tensor.
#
# Also, note the fact that test loss is not affected by
# regularization layers like noise and dropout.
#
# Note: See [this FAQ entry](
# https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call)
# for more details about the difference between `Model` methods `predict()`
# and `__call__()`.
#
# Args:
#     x: Input samples. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A `tf.data` dataset.
#       - A generator or `keras.utils.Sequence` instance.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given in the `Unpacking behavior
#       for iterator-like inputs` section of `Model.fit`.
#     batch_size: Integer or `None`.
#         Number of samples per batch.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of dataset, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     verbose: Verbosity mode, 0 or 1.
#     steps: Total number of steps (batches of samples)
#         before declaring the prediction round finished.
#         Ignored with the default value of `None`. If x is a `tf.data`
#         dataset and `steps` is None, `predict()` will
#         run until the input dataset is exhausted.
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during prediction.
#         See [callbacks](/api_docs/python/tf/keras/callbacks).
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up when using
#         process-based threading. If unspecified, `workers` will default
#         to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# See the discussion of `Unpacking behavior for iterator-like inputs` for
# `Model.fit`. Note that Model.predict uses the same interpretation rules as
# `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
# three methods.
#
# Returns:
#     Numpy array(s) of predictions.
#
# Raises:
#     RuntimeError: If `model.predict` is wrapped in a `tf.function`.
#     ValueError: In case of mismatch between the provided
#         input data and the model's expectations,
#         or in case a stateful model receives a number of samples
#         that is not a multiple of the batch size.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 20</u></h3></summary><small><a href=#20>goto cell # 20</a></small>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.engine.training.Model.predict</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'batch_size': 1}</li></ul>
# <blockquote>
# <code>
# Generates output predictions for the input samples.
#
# Computation is done in batches. This method is designed for batch processing
# of large numbers of inputs. It is not intended for use inside of loops
# that iterate over your data and process small numbers of inputs at a time.
#
# For small numbers of inputs that fit in one batch,
# directly use `__call__()` for faster execution, e.g.,
# `model(x)`, or `model(x, training=False)` if you have layers such as
# `tf.keras.layers.BatchNormalization` that behave differently during
# inference. You may pair the individual model call with a `tf.function`
# for additional performance inside your inner loop.
# If you need access to numpy array values instead of tensors after your
# model call, you can use `tensor.numpy()` to get the numpy array value of
# an eager tensor.
#
# Also, note the fact that test loss is not affected by
# regularization layers like noise and dropout.
#
# Note: See [this FAQ entry](
# https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call)
# for more details about the difference between `Model` methods `predict()`
# and `__call__()`.
#
# Args:
#     x: Input samples. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A `tf.data` dataset.
#       - A generator or `keras.utils.Sequence` instance.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given in the `Unpacking behavior
#       for iterator-like inputs` section of `Model.fit`.
#     batch_size: Integer or `None`.
#         Number of samples per batch.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of dataset, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     verbose: Verbosity mode, 0 or 1.
#     steps: Total number of steps (batches of samples)
#         before declaring the prediction round finished.
#         Ignored with the default value of `None`. If x is a `tf.data`
#         dataset and `steps` is None, `predict()` will
#         run until the input dataset is exhausted.
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during prediction.
#         See [callbacks](/api_docs/python/tf/keras/callbacks).
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up when using
#         process-based threading. If unspecified, `workers` will default
#         to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# See the discussion of `Unpacking behavior for iterator-like inputs` for
# `Model.fit`. Note that Model.predict uses the same interpretation rules as
# `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
# three methods.
#
# Returns:
#     Numpy array(s) of predictions.
#
# Raises:
#     RuntimeError: If `model.predict` is wrapped in a `tf.function`.
#     ValueError: In case of mismatch between the provided
#         input data and the model's expectations,
#         or in case a stateful model receives a number of samples
#         that is not a multiple of the batch size.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# </ul>
# <hr>
#
# <details><summary><h2>View All ML API Calls in Notebook</h2></summary>
# <ul>
#
# <li> <b>datetime</b>
# <ul>
# <li>
# <details><summary><u>datetime.datetime</u></summary>
# <blockquote>
# <code>
# datetime(year, month, day[, hour[, minute[, second[, microsecond[,tzinfo]]]]])
#
# The year, month and day arguments are required. tzinfo may be None, or an
# instance of a tzinfo subclass. The remaining arguments may be ints.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras</u></summary>
# <blockquote>
# <code>
# Implementation of the Keras API, the high-level API of TensorFlow.
#
# Detailed documentation and user guides are available at
# [keras.io](https://keras.io).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.callbacks.EarlyStopping</u></summary>
# <blockquote>
# <code>
# Stop training when a monitored metric has stopped improving.
#
# Assuming the goal of a training is to minimize the loss. With this, the
# metric to be monitored would be `'loss'`, and mode would be `'min'`. A
# `model.fit()` training loop will check at end of every epoch whether
# the loss is no longer decreasing, considering the `min_delta` and
# `patience` if applicable. Once it's found no longer decreasing,
# `model.stop_training` is marked True and the training terminates.
#
# The quantity to be monitored needs to be available in `logs` dict.
# To make it so, pass the loss or metrics at `model.compile()`.
#
# Args:
#   monitor: Quantity to be monitored.
#   min_delta: Minimum change in the monitored quantity
#       to qualify as an improvement, i.e. an absolute
#       change of less than min_delta, will count as no
#       improvement.
#   patience: Number of epochs with no improvement
#       after which training will be stopped.
#   verbose: verbosity mode.
#   mode: One of `{"auto", "min", "max"}`. In `min` mode,
#       training will stop when the quantity
#       monitored has stopped decreasing; in `"max"`
#       mode it will stop when the quantity
#       monitored has stopped increasing; in `"auto"`
#       mode, the direction is automatically inferred
#       from the name of the monitored quantity.
#   baseline: Baseline value for the monitored quantity.
#       Training will stop if the model doesn't show improvement over the
#       baseline.
#   restore_best_weights: Whether to restore model weights from
#       the epoch with the best value of the monitored quantity.
#       If False, the model weights obtained at the last step of
#       training are used. An epoch will be restored regardless
#       of the performance relative to the `baseline`. If no epoch
#       improves on `baseline`, training will run for `patience`
#       epochs and restore weights from the best epoch in that set.
#
# Example:
#
# >>> callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3)
# >>> # This callback will stop the training when there is no improvement in
# >>> # the loss for three consecutive epochs.
# >>> model = tf.keras.models.Sequential([tf.keras.layers.Dense(10)])
# >>> model.compile(tf.keras.optimizers.SGD(), loss='mse')
# >>> history = model.fit(np.arange(100).reshape(5, 20), np.zeros(5),
# ...                     epochs=10, batch_size=1, callbacks=[callback],
# ...                     verbose=0)
# >>> len(history.history['loss'])  # Only 4 epochs are run.
# 4
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential</u></summary>
# <blockquote>
# <code>
# `Sequential` groups a linear stack of layers into a `tf.keras.Model`.
#
# `Sequential` provides training and inference features on this model.
#
# Examples:
#
# ```python
# Optionally, the first layer can receive an `input_shape` argument:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
# model.add(tf.keras.layers.Dense(4))
#
# This is identical to the following:
# model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape=(16,)))
# model.add(tf.keras.layers.Dense(8))
#
# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.weights not created yet
#
# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# model.add(tf.keras.layers.Dense(4))
# len(model.weights)
# Returns "4"
#
# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.build((None, 16))
# len(model.weights)
# Returns "4"
#
# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(1))
# model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
# model.fit(x, y, batch_size=32, epochs=10)
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential.add</u></summary>
# <blockquote>
# <code>
# Adds a layer instance on top of the layer stack.
#
# Args:
#     layer: layer instance.
#
# Raises:
#     TypeError: If `layer` is not a layer instance.
#     ValueError: In case the `layer` argument does not
#         know its input shape.
#     ValueError: In case the `layer` argument has
#         multiple output tensors, or is already connected
#         somewhere else (forbidden in `Sequential` models).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.compile</u></summary>
# <blockquote>
# <code>
# Configures the model for training.
#
# Example:
#
# ```python
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.BinaryCrossentropy(),
#               metrics=[tf.keras.metrics.BinaryAccuracy(),
#                        tf.keras.metrics.FalseNegatives()])
# ```
#
# Args:
#     optimizer: String (name of optimizer) or optimizer instance. See
#       `tf.keras.optimizers`.
#     loss: Loss function. Maybe be a string (name of loss function), or
#       a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss
#       function is any callable with the signature `loss = fn(y_true,
#       y_pred)`, where `y_true` are the ground truth values, and
#       `y_pred` are the model's predictions.
#       `y_true` should have shape
#       `(batch_size, d0, .. dN)` (except in the case of
#       sparse loss functions such as
#       sparse categorical crossentropy which expects integer arrays of shape
#       `(batch_size, d0, .. dN-1)`).
#       `y_pred` should have shape `(batch_size, d0, .. dN)`.
#       The loss function should return a float tensor.
#       If a custom `Loss` instance is
#       used and reduction is set to `None`, return value has shape
#       `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss
#       values; otherwise, it is a scalar. If the model has multiple outputs,
#       you can use a different loss on each output by passing a dictionary
#       or a list of losses. The loss value that will be minimized by the
#       model will then be the sum of all individual losses, unless
#       `loss_weights` is specified.
#     metrics: List of metrics to be evaluated by the model during training
#       and testing. Each of this can be a string (name of a built-in
#       function), function or a `tf.keras.metrics.Metric` instance. See
#       `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A
#       function is any callable with the signature `result = fn(y_true,
#       y_pred)`. To specify different metrics for different outputs of a
#       multi-output model, you could also pass a dictionary, such as
#       `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
#       You can also pass a list to specify a metric or a list of metrics
#       for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]`
#       or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the
#       strings 'accuracy' or 'acc', we convert this to one of
#       `tf.keras.metrics.BinaryAccuracy`,
#       `tf.keras.metrics.CategoricalAccuracy`,
#       `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss
#       function used and the model output shape. We do a similar
#       conversion for the strings 'crossentropy' and 'ce' as well.
#     loss_weights: Optional list or dictionary specifying scalar coefficients
#       (Python floats) to weight the loss contributions of different model
#       outputs. The loss value that will be minimized by the model will then
#       be the *weighted sum* of all individual losses, weighted by the
#       `loss_weights` coefficients.
#         If a list, it is expected to have a 1:1 mapping to the model's
#           outputs. If a dict, it is expected to map output names (strings)
#           to scalar coefficients.
#     weighted_metrics: List of metrics to be evaluated and weighted by
#       `sample_weight` or `class_weight` during training and testing.
#     run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s
#       logic will not be wrapped in a `tf.function`. Recommended to leave
#       this as `None` unless your `Model` cannot be run inside a
#       `tf.function`. `run_eagerly=True` is not supported when using
#       `tf.distribute.experimental.ParameterServerStrategy`.
#     steps_per_execution: Int. Defaults to 1. The number of batches to run
#       during each `tf.function` call. Running multiple batches inside a
#       single `tf.function` call can greatly improve performance on TPUs or
#       small models with a large Python overhead. At most, one full epoch
#       will be run each execution. If a number larger than the size of the
#       epoch is passed, the execution will be truncated to the size of the
#       epoch. Note that if `steps_per_execution` is set to `N`,
#       `Callback.on_batch_begin` and `Callback.on_batch_end` methods will
#       only be called every `N` batches (i.e. before/after each `tf.function`
#       execution).
#     jit_compile: If `True`, compile the model training step with XLA.
#       [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for
#       machine learning.
#       `jit_compile` is not enabled for by default.
#       This option cannot be enabled with `run_eagerly=True`.
#       Note that `jit_compile=True` is
#       may not necessarily work for all models.
#       For more information on supported operations please refer to the
#       [XLA documentation](https://www.tensorflow.org/xla).
#       Also refer to
#       [known XLA issues](https://www.tensorflow.org/xla/known_issues) for
#       more details.
#     **kwargs: Arguments supported for backwards compatibility only.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.fit</u></summary>
# <blockquote>
# <code>
# Trains the model for a fixed number of epochs (iterations on a dataset).
#
# Args:
#     x: Input data. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A dict mapping input names to the corresponding array/tensors,
#         if the model has named inputs.
#       - A `tf.data` dataset. Should return a tuple
#         of either `(inputs, targets)` or
#         `(inputs, targets, sample_weights)`.
#       - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
#         or `(inputs, targets, sample_weights)`.
#       - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
#         callable that takes a single argument of type
#         `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
#         `DatasetCreator` should be used when users prefer to specify the
#         per-replica batching and sharding logic for the `Dataset`.
#         See `tf.keras.utils.experimental.DatasetCreator` doc for more
#         information.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given below. If using
#       `tf.distribute.experimental.ParameterServerStrategy`, only
#       `DatasetCreator` type is supported for `x`.
#     y: Target data. Like the input data `x`,
#       it could be either Numpy array(s) or TensorFlow tensor(s).
#       It should be consistent with `x` (you cannot have Numpy inputs and
#       tensor targets, or inversely). If `x` is a dataset, generator,
#       or `keras.utils.Sequence` instance, `y` should
#       not be specified (since targets will be obtained from `x`).
#     batch_size: Integer or `None`.
#         Number of samples per gradient update.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     epochs: Integer. Number of epochs to train the model.
#         An epoch is an iteration over the entire `x` and `y`
#         data provided
#         (unless the `steps_per_epoch` flag is set to
#         something other than None).
#         Note that in conjunction with `initial_epoch`,
#         `epochs` is to be understood as "final epoch".
#         The model is not trained for a number of iterations
#         given by `epochs`, but merely until the epoch
#         of index `epochs` is reached.
#     verbose: 'auto', 0, 1, or 2. Verbosity mode.
#         0 = silent, 1 = progress bar, 2 = one line per epoch.
#         'auto' defaults to 1 for most cases, but 2 when used with
#         `ParameterServerStrategy`. Note that the progress bar is not
#         particularly useful when logged to a file, so verbose=2 is
#         recommended when not running interactively (eg, in a production
#         environment).
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during training.
#         See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger`
#         and `tf.keras.callbacks.History` callbacks are created automatically
#         and need not be passed into `model.fit`.
#         `tf.keras.callbacks.ProgbarLogger` is created or not based on
#         `verbose` argument to `model.fit`.
#         Callbacks with batch-level calls are currently unsupported with
#         `tf.distribute.experimental.ParameterServerStrategy`, and users are
#         advised to implement epoch-level calls instead with an appropriate
#         `steps_per_epoch` value.
#     validation_split: Float between 0 and 1.
#         Fraction of the training data to be used as validation data.
#         The model will set apart this fraction of the training data,
#         will not train on it, and will evaluate
#         the loss and any model metrics
#         on this data at the end of each epoch.
#         The validation data is selected from the last samples
#         in the `x` and `y` data provided, before shuffling. This argument is
#         not supported when `x` is a dataset, generator or
#        `keras.utils.Sequence` instance.
#         `validation_split` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     validation_data: Data on which to evaluate
#         the loss and any model metrics at the end of each epoch.
#         The model will not be trained on this data. Thus, note the fact
#         that the validation loss of data provided using `validation_split`
#         or `validation_data` is not affected by regularization layers like
#         noise and dropout.
#         `validation_data` will override `validation_split`.
#         `validation_data` could be:
#           - A tuple `(x_val, y_val)` of Numpy arrays or tensors.
#           - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays.
#           - A `tf.data.Dataset`.
#           - A Python generator or `keras.utils.Sequence` returning
#           `(inputs, targets)` or `(inputs, targets, sample_weights)`.
#         `validation_data` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     shuffle: Boolean (whether to shuffle the training data
#         before each epoch) or str (for 'batch'). This argument is ignored
#         when `x` is a generator or an object of tf.data.Dataset.
#         'batch' is a special option for dealing
#         with the limitations of HDF5 data; it shuffles in batch-sized
#         chunks. Has no effect when `steps_per_epoch` is not `None`.
#     class_weight: Optional dictionary mapping class indices (integers)
#         to a weight (float) value, used for weighting the loss function
#         (during training only).
#         This can be useful to tell the model to
#         "pay more attention" to samples from
#         an under-represented class.
#     sample_weight: Optional Numpy array of weights for
#         the training samples, used for weighting the loss function
#         (during training only). You can either pass a flat (1D)
#         Numpy array with the same length as the input samples
#         (1:1 mapping between weights and samples),
#         or in the case of temporal data,
#         you can pass a 2D array with shape
#         `(samples, sequence_length)`,
#         to apply a different weight to every timestep of every sample. This
#         argument is not supported when `x` is a dataset, generator, or
#        `keras.utils.Sequence` instance, instead provide the sample_weights
#         as the third element of `x`.
#     initial_epoch: Integer.
#         Epoch at which to start training
#         (useful for resuming a previous training run).
#     steps_per_epoch: Integer or `None`.
#         Total number of steps (batches of samples)
#         before declaring one epoch finished and starting the
#         next epoch. When training with input tensors such as
#         TensorFlow data tensors, the default `None` is equal to
#         the number of samples in your dataset divided by
#         the batch size, or 1 if that cannot be determined. If x is a
#         `tf.data` dataset, and 'steps_per_epoch'
#         is None, the epoch will run until the input dataset is exhausted.
#         When passing an infinitely repeating dataset, you must specify the
#         `steps_per_epoch` argument. If `steps_per_epoch=-1` the training
#         will run indefinitely with an infinitely repeating dataset.
#         This argument is not supported with array inputs.
#         When using `tf.distribute.experimental.ParameterServerStrategy`:
#           * `steps_per_epoch=None` is not supported.
#     validation_steps: Only relevant if `validation_data` is provided and
#         is a `tf.data` dataset. Total number of steps (batches of
#         samples) to draw before stopping when performing validation
#         at the end of every epoch. If 'validation_steps' is None, validation
#         will run until the `validation_data` dataset is exhausted. In the
#         case of an infinitely repeated dataset, it will run into an
#         infinite loop. If 'validation_steps' is specified and only part of
#         the dataset will be consumed, the evaluation will start from the
#         beginning of the dataset at each epoch. This ensures that the same
#         validation samples are used every time.
#     validation_batch_size: Integer or `None`.
#         Number of samples per validation batch.
#         If unspecified, will default to `batch_size`.
#         Do not specify the `validation_batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     validation_freq: Only relevant if validation data is provided. Integer
#         or `collections.abc.Container` instance (e.g. list, tuple, etc.).
#         If an integer, specifies how many training epochs to run before a
#         new validation run is performed, e.g. `validation_freq=2` runs
#         validation every 2 epochs. If a Container, specifies the epochs on
#         which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
#         validation at the end of the 1st, 2nd, and 10th epochs.
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up
#         when using process-based threading. If unspecified, `workers`
#         will default to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# Unpacking behavior for iterator-like inputs:
#     A common pattern is to pass a tf.data.Dataset, generator, or
#   tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
#   yield not only features (x) but optionally targets (y) and sample weights.
#   Keras requires that the output of such iterator-likes be unambiguous. The
#   iterator should return a tuple of length 1, 2, or 3, where the optional
#   second and third elements will be used for y and sample_weight
#   respectively. Any other type provided will be wrapped in a length one
#   tuple, effectively treating everything as 'x'. When yielding dicts, they
#   should still adhere to the top-level tuple structure.
#   e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
#   features, targets, and weights from the keys of a single dict.
#     A notable unsupported data type is the namedtuple. The reason is that
#   it behaves like both an ordered datatype (tuple) and a mapping
#   datatype (dict). So given a namedtuple of the form:
#       `namedtuple("example_tuple", ["y", "x"])`
#   it is ambiguous whether to reverse the order of the elements when
#   interpreting the value. Even worse is a tuple of the form:
#       `namedtuple("other_tuple", ["x", "y", "z"])`
#   where it is unclear if the tuple was intended to be unpacked into x, y,
#   and sample_weight or passed through as a single element to `x`. As a
#   result the data processing code will simply raise a ValueError if it
#   encounters a namedtuple. (Along with instructions to remedy the issue.)
#
# Returns:
#     A `History` object. Its `History.history` attribute is
#     a record of training loss values and metrics values
#     at successive epochs, as well as validation loss values
#     and validation metrics values (if applicable).
#
# Raises:
#     RuntimeError: 1. If the model was never compiled or,
#     2. If `model.fit` is  wrapped in `tf.function`.
#
#     ValueError: In case of mismatch between the provided input data
#         and what the model expects or when the input data is empty.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.predict</u></summary>
# <blockquote>
# <code>
# Generates output predictions for the input samples.
#
# Computation is done in batches. This method is designed for batch processing
# of large numbers of inputs. It is not intended for use inside of loops
# that iterate over your data and process small numbers of inputs at a time.
#
# For small numbers of inputs that fit in one batch,
# directly use `__call__()` for faster execution, e.g.,
# `model(x)`, or `model(x, training=False)` if you have layers such as
# `tf.keras.layers.BatchNormalization` that behave differently during
# inference. You may pair the individual model call with a `tf.function`
# for additional performance inside your inner loop.
# If you need access to numpy array values instead of tensors after your
# model call, you can use `tensor.numpy()` to get the numpy array value of
# an eager tensor.
#
# Also, note the fact that test loss is not affected by
# regularization layers like noise and dropout.
#
# Note: See [this FAQ entry](
# https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call)
# for more details about the difference between `Model` methods `predict()`
# and `__call__()`.
#
# Args:
#     x: Input samples. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A `tf.data` dataset.
#       - A generator or `keras.utils.Sequence` instance.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given in the `Unpacking behavior
#       for iterator-like inputs` section of `Model.fit`.
#     batch_size: Integer or `None`.
#         Number of samples per batch.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of dataset, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     verbose: Verbosity mode, 0 or 1.
#     steps: Total number of steps (batches of samples)
#         before declaring the prediction round finished.
#         Ignored with the default value of `None`. If x is a `tf.data`
#         dataset and `steps` is None, `predict()` will
#         run until the input dataset is exhausted.
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during prediction.
#         See [callbacks](/api_docs/python/tf/keras/callbacks).
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up when using
#         process-based threading. If unspecified, `workers` will default
#         to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# See the discussion of `Unpacking behavior for iterator-like inputs` for
# `Model.fit`. Note that Model.predict uses the same interpretation rules as
# `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
# three methods.
#
# Returns:
#     Numpy array(s) of predictions.
#
# Raises:
#     RuntimeError: If `model.predict` is wrapped in a `tf.function`.
#     ValueError: In case of mismatch between the provided
#         input data and the model's expectations,
#         or in case a stateful model receives a number of samples
#         that is not a multiple of the batch size.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.core.dense.Dense</u></summary>
# <blockquote>
# <code>
# Just your regular densely-connected NN layer.
#
# `Dense` implements the operation:
# `output = activation(dot(input, kernel) + bias)`
# where `activation` is the element-wise activation function
# passed as the `activation` argument, `kernel` is a weights matrix
# created by the layer, and `bias` is a bias vector created by the layer
# (only applicable if `use_bias` is `True`). These are all attributes of
# `Dense`.
#
# Note: If the input to the layer has a rank greater than 2, then `Dense`
# computes the dot product between the `inputs` and the `kernel` along the
# last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
# For example, if input has dimensions `(batch_size, d0, d1)`,
# then we create a `kernel` with shape `(d1, units)`, and the `kernel` operates
# along axis 2 of the `input`, on every sub-tensor of shape `(1, 1, d1)`
# (there are `batch_size * d0` such sub-tensors).
# The output in this case will have shape `(batch_size, d0, units)`.
#
# Besides, layer attributes cannot be modified after the layer has been called
# once (except the `trainable` attribute).
# When a popular kwarg `input_shape` is passed, then keras will create
# an input layer to insert before the current layer. This can be treated
# equivalent to explicitly defining an `InputLayer`.
#
# Example:
#
# >>> # Create a `Sequential` model and add a Dense layer as the first layer.
# >>> model = tf.keras.models.Sequential()
# >>> model.add(tf.keras.Input(shape=(16,)))
# >>> model.add(tf.keras.layers.Dense(32, activation='relu'))
# >>> # Now the model will take as input arrays of shape (None, 16)
# >>> # and output arrays of shape (None, 32).
# >>> # Note that after the first layer, you don't need to specify
# >>> # the size of the input anymore:
# >>> model.add(tf.keras.layers.Dense(32))
# >>> model.output_shape
# (None, 32)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     If you don't specify anything, no activation is applied
#     (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean, whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix.
#   bias_initializer: Initializer for the bias vector.
#   kernel_regularizer: Regularizer function applied to
#     the `kernel` weights matrix.
#   bias_regularizer: Regularizer function applied to the bias vector.
#   activity_regularizer: Regularizer function applied to
#     the output of the layer (its "activation").
#   kernel_constraint: Constraint function applied to
#     the `kernel` weights matrix.
#   bias_constraint: Constraint function applied to the bias vector.
#
# Input shape:
#   N-D tensor with shape: `(batch_size, ..., input_dim)`.
#   The most common situation would be
#   a 2D input with shape `(batch_size, input_dim)`.
#
# Output shape:
#   N-D tensor with shape: `(batch_size, ..., units)`.
#   For instance, for a 2D input with shape `(batch_size, input_dim)`,
#   the output would have shape `(batch_size, units)`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.recurrent_v2.LSTM</u></summary>
# <blockquote>
# <code>
# Long Short-Term Memory layer - Hochreiter 1997.
#
# See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
# for details about the usage of RNN API.
#
# Based on available runtime hardware and constraints, this layer
# will choose different implementations (cuDNN-based or pure-TensorFlow)
# to maximize the performance. If a GPU is available and all
# the arguments to the layer meet the requirement of the cuDNN kernel
# (see below for details), the layer will use a fast cuDNN implementation.
#
# The requirements to use the cuDNN implementation are:
#
# 1. `activation` == `tanh`
# 2. `recurrent_activation` == `sigmoid`
# 3. `recurrent_dropout` == 0
# 4. `unroll` is `False`
# 5. `use_bias` is `True`
# 6. Inputs, if use masking, are strictly right-padded.
# 7. Eager execution is enabled in the outermost context.
#
# For example:
#
# >>> inputs = tf.random.normal([32, 10, 8])
# >>> lstm = tf.keras.layers.LSTM(4)
# >>> output = lstm(inputs)
# >>> print(output.shape)
# (32, 4)
# >>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True)
# >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)
# >>> print(whole_seq_output.shape)
# (32, 10, 4)
# >>> print(final_memory_state.shape)
# (32, 4)
# >>> print(final_carry_state.shape)
# (32, 4)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation
#     is applied (ie. "linear" activation: `a(x) = x`).
#   recurrent_activation: Activation function to use for the recurrent step.
#     Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
#     applied (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean (default `True`), whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix, used for
#     the linear transformation of the inputs. Default: `glorot_uniform`.
#   recurrent_initializer: Initializer for the `recurrent_kernel` weights
#     matrix, used for the linear transformation of the recurrent state.
#     Default: `orthogonal`.
#   bias_initializer: Initializer for the bias vector. Default: `zeros`.
#   unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of
#     the forget gate at initialization. Setting it to true will also force
#     `bias_initializer="zeros"`. This is recommended in [Jozefowicz et
#         al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf).
#   kernel_regularizer: Regularizer function applied to the `kernel` weights
#     matrix. Default: `None`.
#   recurrent_regularizer: Regularizer function applied to the
#     `recurrent_kernel` weights matrix. Default: `None`.
#   bias_regularizer: Regularizer function applied to the bias vector. Default:
#     `None`.
#   activity_regularizer: Regularizer function applied to the output of the
#     layer (its "activation"). Default: `None`.
#   kernel_constraint: Constraint function applied to the `kernel` weights
#     matrix. Default: `None`.
#   recurrent_constraint: Constraint function applied to the `recurrent_kernel`
#     weights matrix. Default: `None`.
#   bias_constraint: Constraint function applied to the bias vector. Default:
#     `None`.
#   dropout: Float between 0 and 1. Fraction of the units to drop for the linear
#     transformation of the inputs. Default: 0.
#   recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
#     the linear transformation of the recurrent state. Default: 0.
#   return_sequences: Boolean. Whether to return the last output. in the output
#     sequence, or the full sequence. Default: `False`.
#   return_state: Boolean. Whether to return the last state in addition to the
#     output. Default: `False`.
#   go_backwards: Boolean (default `False`). If True, process the input sequence
#     backwards and return the reversed sequence.
#   stateful: Boolean (default `False`). If True, the last state for each sample
#     at index i in a batch will be used as initial state for the sample of
#     index i in the following batch.
#   time_major: The shape format of the `inputs` and `outputs` tensors.
#     If True, the inputs and outputs will be in shape
#     `[timesteps, batch, feature]`, whereas in the False case, it will be
#     `[batch, timesteps, feature]`. Using `time_major = True` is a bit more
#     efficient because it avoids transposes at the beginning and end of the
#     RNN calculation. However, most TensorFlow data is batch-major, so by
#     default this function accepts input and emits output in batch-major
#     form.
#   unroll: Boolean (default `False`). If True, the network will be unrolled,
#     else a symbolic loop will be used. Unrolling can speed-up a RNN, although
#     it tends to be more memory-intensive. Unrolling is only suitable for short
#     sequences.
#
# Call arguments:
#   inputs: A 3D tensor with shape `[batch, timesteps, feature]`.
#   mask: Binary tensor of shape `[batch, timesteps]` indicating whether
#     a given timestep should be masked (optional, defaults to `None`).
#     An individual `True` entry indicates that the corresponding timestep
#     should be utilized, while a `False` entry indicates that the corresponding
#     timestep should be ignored.
#   training: Python boolean indicating whether the layer should behave in
#     training mode or in inference mode. This argument is passed to the cell
#     when calling it. This is only relevant if `dropout` or
#     `recurrent_dropout` is used (optional, defaults to `None`).
#   initial_state: List of initial state tensors to be passed to the first
#     call of the cell (optional, defaults to `None` which causes creation
#     of zero-filled initial state tensors).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.optimizer_v2.adam.Adam</u></summary>
# <blockquote>
# <code>
# Optimizer that implements the Adam algorithm.
#
# Adam optimization is a stochastic gradient descent method that is based on
# adaptive estimation of first-order and second-order moments.
#
# According to
# [Kingma et al., 2014](http://arxiv.org/abs/1412.6980),
# the method is "*computationally
# efficient, has little memory requirement, invariant to diagonal rescaling of
# gradients, and is well suited for problems that are large in terms of
# data/parameters*".
#
# Args:
#   learning_rate: A `Tensor`, floating point value, or a schedule that is a
#     `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable
#     that takes no arguments and returns the actual value to use, The
#     learning rate. Defaults to 0.001.
#   beta_1: A float value or a constant float tensor, or a callable
#     that takes no arguments and returns the actual value to use. The
#     exponential decay rate for the 1st moment estimates. Defaults to 0.9.
#   beta_2: A float value or a constant float tensor, or a callable
#     that takes no arguments and returns the actual value to use, The
#     exponential decay rate for the 2nd moment estimates. Defaults to 0.999.
#   epsilon: A small constant for numerical stability. This epsilon is
#     "epsilon hat" in the Kingma and Ba paper (in the formula just before
#     Section 2.1), not the epsilon in Algorithm 1 of the paper. Defaults to
#     1e-7.
#   amsgrad: Boolean. Whether to apply AMSGrad variant of this algorithm from
#     the paper "On the Convergence of Adam and beyond". Defaults to `False`.
#   name: Optional name for the operations created when applying gradients.
#     Defaults to `"Adam"`.
#   **kwargs: Keyword arguments. Allowed to be one of
#     `"clipnorm"` or `"clipvalue"`.
#     `"clipnorm"` (float) clips gradients by norm; `"clipvalue"` (float) clips
#     gradients by value.
#
# Usage:
#
# >>> opt = tf.keras.optimizers.Adam(learning_rate=0.1)
# >>> var1 = tf.Variable(10.0)
# >>> loss = lambda: (var1 ** 2)/2.0       # d(loss)/d(var1) == var1
# >>> step_count = opt.minimize(loss, [var1]).numpy()
# >>> # The first step is `-learning_rate*sign(grad)`
# >>> var1.numpy()
# 9.9
#
# Reference:
#   - [Kingma et al., 2014](http://arxiv.org/abs/1412.6980)
#   - [Reddi et al., 2018](
#       https://openreview.net/pdf?id=ryQu7f-RZ) for `amsgrad`.
#
# Notes:
#
# The default value of 1e-7 for epsilon might not be a good default in
# general. For example, when training an Inception network on ImageNet a
# current good choice is 1.0 or 0.1. Note that since Adam uses the
# formulation just before Section 2.1 of the Kingma and Ba paper rather than
# the formulation in Algorithm 1, the "epsilon" referred to here is "epsilon
# hat" in the paper.
#
# The sparse implementation of this algorithm (used when the gradient is an
# IndexedSlices object, typically because of `tf.gather` or an embedding
# lookup in the forward pass) does apply momentum to variable slices even if
# they were not used in the forward pass (meaning they have a gradient equal
# to zero). Momentum decay (beta1) is also applied to the entire momentum
# accumulator. This means that the sparse behavior is equivalent to the dense
# behavior (in contrast to some momentum implementations which ignore momentum
# unless a variable slice was actually used).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.utils.np_utils</u></summary>
# <blockquote>
# <code>
# Numpy-related utilities.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>matplotlib</b>
# <ul>
# <li>
# <details><summary><u>matplotlib.pyplot</u></summary>
# <blockquote>
# <code>
# `matplotlib.pyplot` is a state-based interface to matplotlib. It provides
# an implicit,  MATLAB-like, way of plotting.  It also opens figures on your
# screen, and acts as the figure GUI manager.
#
# pyplot is mainly intended for interactive plots and simple cases of
# programmatic plot generation::
#
#     import numpy as np
#     import matplotlib.pyplot as plt
#
#     x = np.arange(0, 5, 0.1)
#     y = np.sin(x)
#     plt.plot(x, y)
#
# The explicit (object-oriented) API is recommended for complex plots, though
# pyplot is still usually used to create the figure and often the axes in the
# figure. See `.pyplot.figure`, `.pyplot.subplots`, and
# `.pyplot.subplot_mosaic` to create figures, and
# :doc:`Axes API <../axes_api>` for the plotting methods on an axes::
#
#     import numpy as np
#     import matplotlib.pyplot as plt
#
#     x = np.arange(0, 5, 0.1)
#     y = np.sin(x)
#     fig, ax = plt.subplots()
#     ax.plot(x, y)
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.figure</u></summary>
# <blockquote>
# <code>
# Create a new figure, or activate an existing figure.
#
# Parameters
# ----------
# num : int or str or `.Figure`, optional
#     A unique identifier for the figure.
#
#     If a figure with that identifier already exists, this figure is made
#     active and returned. An integer refers to the ``Figure.number``
#     attribute, a string refers to the figure label.
#
#     If there is no figure with the identifier or *num* is not given, a new
#     figure is created, made active and returned.  If *num* is an int, it
#     will be used for the ``Figure.number`` attribute, otherwise, an
#     auto-generated integer value is used (starting at 1 and incremented
#     for each new figure). If *num* is a string, the figure label and the
#     window title is set to this value.
#
# figsize : (float, float), default: :rc:`figure.figsize`
#     Width, height in inches.
#
# dpi : float, default: :rc:`figure.dpi`
#     The resolution of the figure in dots-per-inch.
#
# facecolor : color, default: :rc:`figure.facecolor`
#     The background color.
#
# edgecolor : color, default: :rc:`figure.edgecolor`
#     The border color.
#
# frameon : bool, default: True
#     If False, suppress drawing the figure frame.
#
# FigureClass : subclass of `~matplotlib.figure.Figure`
#     Optionally use a custom `.Figure` instance.
#
# clear : bool, default: False
#     If True and the figure already exists, then it is cleared.
#
# tight_layout : bool or dict, default: :rc:`figure.autolayout`
#     If ``False`` use *subplotpars*. If ``True`` adjust subplot
#     parameters using `.tight_layout` with default padding.
#     When providing a dict containing the keys ``pad``, ``w_pad``,
#     ``h_pad``, and ``rect``, the default `.tight_layout` paddings
#     will be overridden.
#
# constrained_layout : bool, default: :rc:`figure.constrained_layout.use`
#     If ``True`` use constrained layout to adjust positioning of plot
#     elements.  Like ``tight_layout``, but designed to be more
#     flexible.  See
#     :doc:`/tutorials/intermediate/constrainedlayout_guide`
#     for examples.  (Note: does not work with `add_subplot` or
#     `~.pyplot.subplot2grid`.)
#
#
# **kwargs : optional
#     See `~.matplotlib.figure.Figure` for other possible arguments.
#
# Returns
# -------
# `~matplotlib.figure.Figure`
#     The `.Figure` instance returned will also be passed to
#     new_figure_manager in the backends, which allows to hook custom
#     `.Figure` classes into the pyplot interface. Additional kwargs will be
#     passed to the `.Figure` init function.
#
# Notes
# -----
# If you are creating many figures, make sure you explicitly call
# `.pyplot.close` on the figures you are not using, because this will
# enable pyplot to properly clean up the memory.
#
# `~matplotlib.rcParams` defines the default values, which can be modified
# in the matplotlibrc file.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.show</u></summary>
# <blockquote>
# <code>
# Display all open figures.
#
# Parameters
# ----------
# block : bool, optional
#     Whether to wait for all figures to be closed before returning.
#
#     If `True` block and run the GUI main loop until all figure windows
#     are closed.
#
#     If `False` ensure that all figure windows are displayed and return
#     immediately.  In this case, you are responsible for ensuring
#     that the event loop is running to have responsive figures.
#
#     Defaults to True in non-interactive mode and to False in interactive
#     mode (see `.pyplot.isinteractive`).
#
# See Also
# --------
# ion : Enable interactive mode, which shows / updates the figure after
#       every plotting command, so that calling ``show()`` is not necessary.
# ioff : Disable interactive mode.
# savefig : Save the figure to an image file instead of showing it on screen.
#
# Notes
# -----
# **Saving figures to file and showing a window at the same time**
#
# If you want an image file as well as a user interface window, use
# `.pyplot.savefig` before `.pyplot.show`. At the end of (a blocking)
# ``show()`` the figure is closed and thus unregistered from pyplot. Calling
# `.pyplot.savefig` afterwards would save a new and thus empty figure. This
# limitation of command order does not apply if the show is non-blocking or
# if you keep a reference to the figure and use `.Figure.savefig`.
#
# **Auto-show in jupyter notebooks**
#
# The jupyter backends (activated via ``%matplotlib inline``,
# ``%matplotlib notebook``, or ``%matplotlib widget``), call ``show()`` at
# the end of every cell by default. Thus, you usually don't have to call it
# explicitly there.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.title</u></summary>
# <blockquote>
# <code>
# Set a title for the Axes.
#
# Set one of the three available Axes titles. The available titles
# are positioned above the Axes in the center, flush with the left
# edge, and flush with the right edge.
#
# Parameters
# ----------
# label : str
#     Text to use for the title
#
# fontdict : dict
#     A dictionary controlling the appearance of the title text,
#     the default *fontdict* is::
#
#        {'fontsize': rcParams['axes.titlesize'],
#         'fontweight': rcParams['axes.titleweight'],
#         'color': rcParams['axes.titlecolor'],
#         'verticalalignment': 'baseline',
#         'horizontalalignment': loc}
#
# loc : {'center', 'left', 'right'}, default: :rc:`axes.titlelocation`
#     Which title to set.
#
# y : float, default: :rc:`axes.titley`
#     Vertical Axes location for the title (1.0 is the top).  If
#     None (the default) and :rc:`axes.titley` is also None, y is
#     determined automatically to avoid decorators on the Axes.
#
# pad : float, default: :rc:`axes.titlepad`
#     The offset of the title from the top of the Axes, in points.
#
# Returns
# -------
# `.Text`
#     The matplotlib text instance representing the title
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     Other keyword arguments are text properties, see `.Text` for a list
#     of valid text properties.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.xlabel</u></summary>
# <blockquote>
# <code>
# Set the label for the x-axis.
#
# Parameters
# ----------
# xlabel : str
#     The label text.
#
# labelpad : float, default: :rc:`axes.labelpad`
#     Spacing in points from the Axes bounding box including ticks
#     and tick labels.  If None, the previous value is left as is.
#
# loc : {'left', 'center', 'right'}, default: :rc:`xaxis.labellocation`
#     The label position. This is a high-level alternative for passing
#     parameters *x* and *horizontalalignment*.
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     `.Text` properties control the appearance of the label.
#
# See Also
# --------
# text : Documents the properties supported by `.Text`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.ylabel</u></summary>
# <blockquote>
# <code>
# Set the label for the y-axis.
#
# Parameters
# ----------
# ylabel : str
#     The label text.
#
# labelpad : float, default: :rc:`axes.labelpad`
#     Spacing in points from the Axes bounding box including ticks
#     and tick labels.  If None, the previous value is left as is.
#
# loc : {'bottom', 'center', 'top'}, default: :rc:`yaxis.labellocation`
#     The label position. This is a high-level alternative for passing
#     parameters *y* and *horizontalalignment*.
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     `.Text` properties control the appearance of the label.
#
# See Also
# --------
# text : Documents the properties supported by `.Text`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy</u></summary>
# <blockquote>
# <code>
# NumPy
# =====
#
# Provides
#   1. An array object of arbitrary homogeneous items
#   2. Fast mathematical operations over arrays
#   3. Linear Algebra, Fourier Transforms, Random Number Generation
#
# How to use the documentation
# ----------------------------
# Documentation is available in two forms: docstrings provided
# with the code, and a loose standing reference guide, available from
# `the NumPy homepage <https://www.scipy.org>`_.
#
# We recommend exploring the docstrings using
# `IPython <https://ipython.org>`_, an advanced Python shell with
# TAB-completion and introspection capabilities.  See below for further
# instructions.
#
# The docstring examples assume that `numpy` has been imported as `np`::
#
#   >>> import numpy as np
#
# Code snippets are indicated by three greater-than signs::
#
#   >>> x = 42
#   >>> x = x + 1
#
# Use the built-in ``help`` function to view a function's docstring::
#
#   >>> help(np.sort)
#   ... # doctest: +SKIP
#
# For some objects, ``np.info(obj)`` may provide additional help.  This is
# particularly true if you see the line "Help on ufunc object:" at the top
# of the help() page.  Ufuncs are implemented in C, not Python, for speed.
# The native Python help() does not know how to view their help, but our
# np.info() function does.
#
# To search for documents containing a keyword, do::
#
#   >>> np.lookfor('keyword')
#   ... # doctest: +SKIP
#
# General-purpose documents like a glossary and help on the basic concepts
# of numpy are available under the ``doc`` sub-module::
#
#   >>> from numpy import doc
#   >>> help(doc)
#   ... # doctest: +SKIP
#
# Available subpackages
# ---------------------
# doc
#     Topical documentation on broadcasting, indexing, etc.
# lib
#     Basic functions used by several sub-packages.
# random
#     Core Random Tools
# linalg
#     Core Linear Algebra Tools
# fft
#     Core FFT routines
# polynomial
#     Polynomial tools
# testing
#     NumPy testing tools
# f2py
#     Fortran to Python Interface Generator.
# distutils
#     Enhancements to distutils with support for
#     Fortran compilers support and more.
#
# Utilities
# ---------
# test
#     Run numpy unittests
# show_config
#     Show numpy build configuration
# dual
#     Overwrite certain functions with high-performance SciPy tools.
#     Note: `numpy.dual` is deprecated.  Use the functions from NumPy or Scipy
#     directly instead of importing them from `numpy.dual`.
# matlib
#     Make everything matrices.
# __version__
#     NumPy version string
#
# Viewing documentation using IPython
# -----------------------------------
# Start IPython with the NumPy profile (``ipython -p numpy``), which will
# import `numpy` under the alias `np`.  Then, use the ``cpaste`` command to
# paste examples into the shell.  To see which functions are available in
# `numpy`, type ``np.<TAB>`` (where ``<TAB>`` refers to the TAB key), or use
# ``np.*cos*?<ENTER>`` (where ``<ENTER>`` refers to the ENTER key) to narrow
# down the list.  To view the docstring for a function, use
# ``np.cos?<ENTER>`` (to view the docstring) and ``np.cos??<ENTER>`` (to view
# the source code).
#
# Copies vs. in-place operation
# -----------------------------
# Most of the functions in `numpy` return a copy of the array argument
# (e.g., `np.sort`).  In-place versions of these functions are often
# available as array methods, i.e. ``x = np.array([1,2,3]); x.sort()``.
# Exceptions to this rule are documented.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.array</u></summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.core._multiarray_umath.concatenate</u></summary>
# <blockquote>
# <code>
# concatenate((a1, a2, ...), axis=0, out=None, dtype=None, casting="same_kind")
#
# Join a sequence of arrays along an existing axis.
#
# Parameters
# ----------
# a1, a2, ... : sequence of array_like
#     The arrays must have the same shape, except in the dimension
#     corresponding to `axis` (the first, by default).
# axis : int, optional
#     The axis along which the arrays will be joined.  If axis is None,
#     arrays are flattened before use.  Default is 0.
# out : ndarray, optional
#     If provided, the destination to place the result. The shape must be
#     correct, matching that of what concatenate would have returned if no
#     out argument were specified.
# dtype : str or dtype
#     If provided, the destination array will have this dtype. Cannot be
#     provided together with `out`.
#
#     .. versionadded:: 1.20.0
#
# casting : {'no', 'equiv', 'safe', 'same_kind', 'unsafe'}, optional
#     Controls what kind of data casting may occur. Defaults to 'same_kind'.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# res : ndarray
#     The concatenated array.
#
# See Also
# --------
# ma.concatenate : Concatenate function that preserves input masks.
# array_split : Split an array into multiple sub-arrays of equal or
#               near-equal size.
# split : Split array into a list of multiple sub-arrays of equal size.
# hsplit : Split array into multiple sub-arrays horizontally (column wise).
# vsplit : Split array into multiple sub-arrays vertically (row wise).
# dsplit : Split array into multiple sub-arrays along the 3rd axis (depth).
# stack : Stack a sequence of arrays along a new axis.
# block : Assemble arrays from blocks.
# hstack : Stack arrays in sequence horizontally (column wise).
# vstack : Stack arrays in sequence vertically (row wise).
# dstack : Stack arrays in sequence depth wise (along third dimension).
# column_stack : Stack 1-D arrays as columns into a 2-D array.
#
# Notes
# -----
# When one or more of the arrays to be concatenated is a MaskedArray,
# this function will return a MaskedArray object instead of an ndarray,
# but the input masks are *not* preserved. In cases where a MaskedArray
# is expected as input, use the ma.concatenate function from the masked
# array module instead.
#
# Examples
# --------
# >>> a = np.array([[1, 2], [3, 4]])
# >>> b = np.array([[5, 6]])
# >>> np.concatenate((a, b), axis=0)
# array([[1, 2],
#        [3, 4],
#        [5, 6]])
# >>> np.concatenate((a, b.T), axis=1)
# array([[1, 2, 5],
#        [3, 4, 6]])
# >>> np.concatenate((a, b), axis=None)
# array([1, 2, 3, 4, 5, 6])
#
# This function will not preserve masking of MaskedArray inputs.
#
# >>> a = np.ma.arange(3)
# >>> a[1] = np.ma.masked
# >>> b = np.arange(2, 5)
# >>> a
# masked_array(data=[0, --, 2],
#              mask=[False,  True, False],
#        fill_value=999999)
# >>> b
# array([2, 3, 4])
# >>> np.concatenate([a, b])
# masked_array(data=[0, 1, 2, 2, 3, 4],
#              mask=False,
#        fill_value=999999)
# >>> np.ma.concatenate([a, b])
# masked_array(data=[0, --, 2, 2, 3, 4],
#              mask=[False,  True, False, False, False, False],
#        fill_value=999999)
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.ndarray</u></summary>
# <blockquote>
# <code>
# ndarray(shape, dtype=float, buffer=None, offset=0,
#         strides=None, order=None)
#
# An array object represents a multidimensional, homogeneous array
# of fixed-size items.  An associated data-type object describes the
# format of each element in the array (its byte-order, how many bytes it
# occupies in memory, whether it is an integer, a floating point number,
# or something else, etc.)
#
# Arrays should be constructed using `array`, `zeros` or `empty` (refer
# to the See Also section below).  The parameters given here refer to
# a low-level method (`ndarray(...)`) for instantiating an array.
#
# For more information, refer to the `numpy` module and examine the
# methods and attributes of an array.
#
# Parameters
# ----------
# (for the __new__ method; see Notes below)
#
# shape : tuple of ints
#     Shape of created array.
# dtype : data-type, optional
#     Any object that can be interpreted as a numpy data type.
# buffer : object exposing buffer interface, optional
#     Used to fill the array with data.
# offset : int, optional
#     Offset of array data in buffer.
# strides : tuple of ints, optional
#     Strides of data in memory.
# order : {'C', 'F'}, optional
#     Row-major (C-style) or column-major (Fortran-style) order.
#
# Attributes
# ----------
# T : ndarray
#     Transpose of the array.
# data : buffer
#     The array's elements, in memory.
# dtype : dtype object
#     Describes the format of the elements in the array.
# flags : dict
#     Dictionary containing information related to memory use, e.g.,
#     'C_CONTIGUOUS', 'OWNDATA', 'WRITEABLE', etc.
# flat : numpy.flatiter object
#     Flattened version of the array as an iterator.  The iterator
#     allows assignments, e.g., ``x.flat = 3`` (See `ndarray.flat` for
#     assignment examples; TODO).
# imag : ndarray
#     Imaginary part of the array.
# real : ndarray
#     Real part of the array.
# size : int
#     Number of elements in the array.
# itemsize : int
#     The memory use of each array element in bytes.
# nbytes : int
#     The total number of bytes required to store the array data,
#     i.e., ``itemsize * size``.
# ndim : int
#     The array's number of dimensions.
# shape : tuple of ints
#     Shape of the array.
# strides : tuple of ints
#     The step-size required to move from one element to the next in
#     memory. For example, a contiguous ``(3, 4)`` array of type
#     ``int16`` in C-order has strides ``(8, 2)``.  This implies that
#     to move from element to element in memory requires jumps of 2 bytes.
#     To move from row-to-row, one needs to jump 8 bytes at a time
#     (``2 * 4``).
# ctypes : ctypes object
#     Class containing properties of the array needed for interaction
#     with ctypes.
# base : ndarray
#     If the array is a view into another array, that array is its `base`
#     (unless that array is also a view).  The `base` array is where the
#     array data is actually stored.
#
# See Also
# --------
# array : Construct an array.
# zeros : Create an array, each element of which is zero.
# empty : Create an array, but leave its allocated memory unchanged (i.e.,
#         it contains "garbage").
# dtype : Create a data-type.
# numpy.typing.NDArray : An ndarray alias :term:`generic <generic type>`
#                        w.r.t. its `dtype.type <numpy.dtype.type>`.
#
# Notes
# -----
# There are two modes of creating an array using ``__new__``:
#
# 1. If `buffer` is None, then only `shape`, `dtype`, and `order`
#    are used.
# 2. If `buffer` is an object exposing the buffer interface, then
#    all keywords are interpreted.
#
# No ``__init__`` method is needed because the array is fully initialized
# after the ``__new__`` method.
#
# Examples
# --------
# These examples illustrate the low-level `ndarray` constructor.  Refer
# to the `See Also` section above for easier ways of constructing an
# ndarray.
#
# First mode, `buffer` is None:
#
# >>> np.ndarray(shape=(2,2), dtype=float, order='F')
# array([[0.0e+000, 0.0e+000], # random
#        [     nan, 2.5e-323]])
#
# Second mode:
#
# >>> np.ndarray((2,), buffer=np.array([1,2,3]),
# ...            offset=np.int_().itemsize,
# ...            dtype=int) # offset = 1*itemsize, i.e. skip first element
# array([2, 3])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.ndarray.reshape</u></summary>
# <blockquote>
# <code>
# a.reshape(shape, order='C')
#
# Returns an array containing the same data with a new shape.
#
# Refer to `numpy.reshape` for full documentation.
#
# See Also
# --------
# numpy.reshape : equivalent function
#
# Notes
# -----
# Unlike the free function `numpy.reshape`, this method on `ndarray` allows
# the elements of the shape parameter to be passed in as separate arguments.
# For example, ``a.reshape(10, 11)`` is equivalent to
# ``a.reshape((10, 11))``.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.ndarray.shape</u></summary>
# <blockquote>
# <code>
# Tuple of array dimensions.
#
# The shape property is usually used to get the current shape of an array,
# but may also be used to reshape the array in-place by assigning a tuple of
# array dimensions to it.  As with `numpy.reshape`, one of the new shape
# dimensions can be -1, in which case its value is inferred from the size of
# the array and the remaining dimensions. Reshaping an array in-place will
# fail if a copy is required.
#
# Examples
# --------
# >>> x = np.array([1, 2, 3, 4])
# >>> x.shape
# (4,)
# >>> y = np.zeros((2, 3, 4))
# >>> y.shape
# (2, 3, 4)
# >>> y.shape = (3, 8)
# >>> y
# array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
#        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
#        [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.]])
# >>> y.shape = (3, 6)
# Traceback (most recent call last):
#   File "<stdin>", line 1, in <module>
# ValueError: total size of new array must be unchanged
# >>> np.zeros((4,2))[::2].shape = (-1,)
# Traceback (most recent call last):
#   File "<stdin>", line 1, in <module>
# AttributeError: Incompatible shape for in-place modification. Use
# `.reshape()` to make a copy with the desired shape.
#
# See Also
# --------
# numpy.reshape : similar function
# ndarray.reshape : similar method
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas</u></summary>
# <blockquote>
# <code>
# pandas - a powerful data analysis and manipulation library for Python
# =====================================================================
#
# **pandas** is a Python package providing fast, flexible, and expressive data
# structures designed to make working with "relational" or "labeled" data both
# easy and intuitive. It aims to be the fundamental high-level building block for
# doing practical, **real world** data analysis in Python. Additionally, it has
# the broader goal of becoming **the most powerful and flexible open source data
# analysis / manipulation tool available in any language**. It is already well on
# its way toward this goal.
#
# Main Features
# -------------
# Here are just a few of the things that pandas does well:
#
#   - Easy handling of missing data in floating point as well as non-floating
#     point data.
#   - Size mutability: columns can be inserted and deleted from DataFrame and
#     higher dimensional objects
#   - Automatic and explicit data alignment: objects can be explicitly aligned
#     to a set of labels, or the user can simply ignore the labels and let
#     `Series`, `DataFrame`, etc. automatically align the data for you in
#     computations.
#   - Powerful, flexible group by functionality to perform split-apply-combine
#     operations on data sets, for both aggregating and transforming data.
#   - Make it easy to convert ragged, differently-indexed data in other Python
#     and NumPy data structures into DataFrame objects.
#   - Intelligent label-based slicing, fancy indexing, and subsetting of large
#     data sets.
#   - Intuitive merging and joining data sets.
#   - Flexible reshaping and pivoting of data sets.
#   - Hierarchical labeling of axes (possible to have multiple labels per tick).
#   - Robust IO tools for loading data from flat files (CSV and delimited),
#     Excel files, databases, and saving/loading data from the ultrafast HDF5
#     format.
#   - Time series-specific functionality: date range generation and frequency
#     conversion, moving window statistics, date shifting and lagging.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame</u></summary>
# <blockquote>
# <code>
# Two-dimensional, size-mutable, potentially heterogeneous tabular data.
#
# Data structure also contains labeled axes (rows and columns).
# Arithmetic operations align on both row and column labels. Can be
# thought of as a dict-like container for Series objects. The primary
# pandas data structure.
#
# Parameters
# ----------
# data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
#     Dict can contain Series, arrays, constants, dataclass or list-like objects. If
#     data is a dict, column order follows insertion-order. If a dict contains Series
#     which have an index defined, it is aligned by its index.
#
#     .. versionchanged:: 0.25.0
#        If data is a list of dicts, column order follows insertion-order.
#
# index : Index or array-like
#     Index to use for resulting frame. Will default to RangeIndex if
#     no indexing information part of input data and no index provided.
# columns : Index or array-like
#     Column labels to use for resulting frame when data does not have them,
#     defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels,
#     will perform column selection instead.
# dtype : dtype, default None
#     Data type to force. Only a single dtype is allowed. If None, infer.
# copy : bool or None, default None
#     Copy data from inputs.
#     For dict data, the default of None behaves like ``copy=True``.  For DataFrame
#     or 2d ndarray input, the default of None behaves like ``copy=False``.
#
#     .. versionchanged:: 1.3.0
#
# See Also
# --------
# DataFrame.from_records : Constructor from tuples, also record arrays.
# DataFrame.from_dict : From dicts of Series, arrays, or dicts.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_table : Read general delimited file into DataFrame.
# read_clipboard : Read text from clipboard into DataFrame.
#
# Examples
# --------
# Constructing DataFrame from a dictionary.
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df
#    col1  col2
# 0     1     3
# 1     2     4
#
# Notice that the inferred dtype is int64.
#
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# To enforce a single dtype:
#
# >>> df = pd.DataFrame(data=d, dtype=np.int8)
# >>> df.dtypes
# col1    int8
# col2    int8
# dtype: object
#
# Constructing DataFrame from a dictionary including Series:
#
# >>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])}
# >>> pd.DataFrame(data=d, index=[0, 1, 2, 3])
#    col1  col2
# 0     0   NaN
# 1     1   NaN
# 2     2   2.0
# 3     3   3.0
#
# Constructing DataFrame from numpy ndarray:
#
# >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
# ...                    columns=['a', 'b', 'c'])
# >>> df2
#    a  b  c
# 0  1  2  3
# 1  4  5  6
# 2  7  8  9
#
# Constructing DataFrame from a numpy ndarray that has labeled columns:
#
# >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
# ...                 dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")])
# >>> df3 = pd.DataFrame(data, columns=['c', 'a'])
# ...
# >>> df3
#    c  a
# 0  3  1
# 1  6  4
# 2  9  7
#
# Constructing DataFrame from dataclass:
#
# >>> from dataclasses import make_dataclass
# >>> Point = make_dataclass("Point", [("x", int), ("y", int)])
# >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
#    x  y
# 0  0  0
# 1  0  3
# 2  2  3
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u></summary>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.head</u></summary>
# <blockquote>
# <code>
# Return the first `n` rows.
#
# This function returns the first `n` rows for the object based
# on position. It is useful for quickly testing if your object
# has the right type of data in it.
#
# For negative values of `n`, this function returns all rows except
# the last `n` rows, equivalent to ``df[:-n]``.
#
# Parameters
# ----------
# n : int, default 5
#     Number of rows to select.
#
# Returns
# -------
# same type as caller
#     The first `n` rows of the caller object.
#
# See Also
# --------
# DataFrame.tail: Returns the last `n` rows.
#
# Examples
# --------
# >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
# ...                    'monkey', 'parrot', 'shark', 'whale', 'zebra']})
# >>> df
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
# 5     parrot
# 6      shark
# 7      whale
# 8      zebra
#
# Viewing the first 5 lines
#
# >>> df.head()
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
#
# Viewing the first `n` lines (three in this case)
#
# >>> df.head(3)
#       animal
# 0  alligator
# 1        bee
# 2     falcon
#
# For negative values of `n`
#
# >>> df.head(-3)
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
# 5     parrot
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.to_csv</u></summary>
# <blockquote>
# <code>
# Write object to a comma-separated values (csv) file.
#
# Parameters
# ----------
# path_or_buf : str, path object, file-like object, or None, default None
#     String, path object (implementing os.PathLike[str]), or file-like
#     object implementing a write() function. If None, the result is
#     returned as a string. If a non-binary file object is passed, it should
#     be opened with `newline=''`, disabling universal newlines. If a binary
#     file object is passed, `mode` might need to contain a `'b'`.
#
#     .. versionchanged:: 1.2.0
#
#        Support for binary file objects was introduced.
#
# sep : str, default ','
#     String of length 1. Field delimiter for the output file.
# na_rep : str, default ''
#     Missing data representation.
# float_format : str, default None
#     Format string for floating point numbers.
# columns : sequence, optional
#     Columns to write.
# header : bool or list of str, default True
#     Write out the column names. If a list of strings is given it is
#     assumed to be aliases for the column names.
# index : bool, default True
#     Write row names (index).
# index_label : str or sequence, or False, default None
#     Column label for index column(s) if desired. If None is given, and
#     `header` and `index` are True, then the index names are used. A
#     sequence should be given if the object uses MultiIndex. If
#     False do not print fields for index names. Use index_label=False
#     for easier importing in R.
# mode : str
#     Python write mode, default 'w'.
# encoding : str, optional
#     A string representing the encoding to use in the output file,
#     defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
#     is a non-binary file object.
# compression : str or dict, default 'infer'
#     For on-the-fly compression of the output data. If 'infer' and '%s'
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). Set to
#     ``None`` for no compression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for faster compression and to create
#     a reproducible gzip archive:
#     ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
#
#     .. versionchanged:: 1.0.0
#
#        May now be a dict with key 'method' as compression mode
#        and other entries as additional compression options if
#        compression mode is 'zip'.
#
#     .. versionchanged:: 1.1.0
#
#        Passing compression options as keys in dict is
#        supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'.
#
#     .. versionchanged:: 1.2.0
#
#         Compression is supported for binary file objects.
#
#     .. versionchanged:: 1.2.0
#
#         Previous versions forwarded dict entries for 'gzip' to
#         `gzip.open` instead of `gzip.GzipFile` which prevented
#         setting `mtime`.
#
# quoting : optional constant from csv module
#     Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
#     then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
#     will treat them as non-numeric.
# quotechar : str, default '\"'
#     String of length 1. Character used to quote fields.
# line_terminator : str, optional
#     The newline character or character sequence to use in the output
#     file. Defaults to `os.linesep`, which depends on the OS in which
#     this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).
# chunksize : int or None
#     Rows to write at a time.
# date_format : str, default None
#     Format string for datetime objects.
# doublequote : bool, default True
#     Control quoting of `quotechar` inside a field.
# escapechar : str, default None
#     String of length 1. Character used to escape `sep` and `quotechar`
#     when appropriate.
# decimal : str, default '.'
#     Character recognized as decimal separator. E.g. use ',' for
#     European data.
# errors : str, default 'strict'
#     Specifies how encoding and decoding errors are to be handled.
#     See the errors argument for :func:`open` for a full list
#     of options.
#
#     .. versionadded:: 1.1.0
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2.0
#
# Returns
# -------
# None or str
#     If path_or_buf is None, returns the resulting csv format as a
#     string. Otherwise returns None.
#
# See Also
# --------
# read_csv : Load a CSV file into a DataFrame.
# to_excel : Write DataFrame to an Excel file.
#
# Examples
# --------
# >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
# ...                    'mask': ['red', 'purple'],
# ...                    'weapon': ['sai', 'bo staff']})
# >>> df.to_csv(index=False)
# 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
#
# Create 'out.zip' containing 'out.csv'
#
# >>> compression_opts = dict(method='zip',
# ...                         archive_name='out.csv')  # doctest: +SKIP
# >>> df.to_csv('out.zip', index=False,
# ...           compression=compression_opts)  # doctest: +SKIP
#
# To write a csv file to a new folder or nested folder you will first
# need to create it using either Pathlib or os:
#
# >>> from pathlib import Path  # doctest: +SKIP
# >>> filepath = Path('folder/subfolder/out.csv')  # doctest: +SKIP
# >>> filepath.parent.mkdir(parents=True, exist_ok=True)  # doctest: +SKIP
# >>> df.to_csv(filepath)  # doctest: +SKIP
#
# >>> import os  # doctest: +SKIP
# >>> os.makedirs('folder/subfolder', exist_ok=True)  # doctest: +SKIP
# >>> df.to_csv('folder/subfolder/out.csv')  # doctest: +SKIP
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.io.parsers.readers.read_csv</u></summary>
# <blockquote>
# <code>
# Read a comma-separated values (csv) file into DataFrame.
#
# Also supports optionally iterating or breaking of the file
# into chunks.
#
# Additional help can be found in the online docs for
# `IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
#
# Parameters
# ----------
# filepath_or_buffer : str, path object or file-like object
#     Any valid string path is acceptable. The string could be a URL. Valid
#     URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
#     expected. A local file could be: file://localhost/path/to/table.csv.
#
#     If you want to pass in a path object, pandas accepts any ``os.PathLike``.
#
#     By file-like object, we refer to objects with a ``read()`` method, such as
#     a file handle (e.g. via builtin ``open`` function) or ``StringIO``.
# sep : str, default ','
#     Delimiter to use. If sep is None, the C engine cannot automatically detect
#     the separator, but the Python parsing engine can, meaning the latter will
#     be used and automatically detect the separator by Python's builtin sniffer
#     tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
#     different from ``'\s+'`` will be interpreted as regular expressions and
#     will also force the use of the Python parsing engine. Note that regex
#     delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
# delimiter : str, default ``None``
#     Alias for sep.
# header : int, list of int, None, default 'infer'
#     Row number(s) to use as the column names, and the start of the
#     data.  Default behavior is to infer the column names: if no names
#     are passed the behavior is identical to ``header=0`` and column
#     names are inferred from the first line of the file, if column
#     names are passed explicitly then the behavior is identical to
#     ``header=None``. Explicitly pass ``header=0`` to be able to
#     replace existing names. The header can be a list of integers that
#     specify row locations for a multi-index on the columns
#     e.g. [0,1,3]. Intervening rows that are not specified will be
#     skipped (e.g. 2 in this example is skipped). Note that this
#     parameter ignores commented lines and empty lines if
#     ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
#     data rather than the first line of the file.
# names : array-like, optional
#     List of column names to use. If the file contains a header row,
#     then you should explicitly pass ``header=0`` to override the column names.
#     Duplicates in this list are not allowed.
# index_col : int, str, sequence of int / str, or False, optional, default ``None``
#   Column(s) to use as the row labels of the ``DataFrame``, either given as
#   string name or column index. If a sequence of int / str is given, a
#   MultiIndex is used.
#
#   Note: ``index_col=False`` can be used to force pandas to *not* use the first
#   column as the index, e.g. when you have a malformed file with delimiters at
#   the end of each line.
# usecols : list-like or callable, optional
#     Return a subset of the columns. If list-like, all elements must either
#     be positional (i.e. integer indices into the document columns) or strings
#     that correspond to column names provided either by the user in `names` or
#     inferred from the document header row(s). If ``names`` are given, the document
#     header row(s) are not taken into account. For example, a valid list-like
#     `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
#     Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
#     To instantiate a DataFrame from ``data`` with element order preserved use
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
#     in ``['foo', 'bar']`` order or
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
#     for ``['bar', 'foo']`` order.
#
#     If callable, the callable function will be evaluated against the column
#     names, returning names where the callable function evaluates to True. An
#     example of a valid callable argument would be ``lambda x: x.upper() in
#     ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
#     parsing time and lower memory usage.
# squeeze : bool, default False
#     If the parsed data only contains one column then return a Series.
#
#     .. deprecated:: 1.4.0
#         Append ``.squeeze("columns")`` to the call to ``read_csv`` to squeeze
#         the data.
# prefix : str, optional
#     Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
#
#     .. deprecated:: 1.4.0
#        Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
# mangle_dupe_cols : bool, default True
#     Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
#     'X'...'X'. Passing in False will cause data to be overwritten if there
#     are duplicate names in the columns.
# dtype : Type name or dict of column -> type, optional
#     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,
#     'c': 'Int64'}
#     Use `str` or `object` together with suitable `na_values` settings
#     to preserve and not interpret dtype.
#     If converters are specified, they will be applied INSTEAD
#     of dtype conversion.
# engine : {'c', 'python', 'pyarrow'}, optional
#     Parser engine to use. The C and pyarrow engines are faster, while the python engine
#     is currently more feature-complete. Multithreading is currently only supported by
#     the pyarrow engine.
#
#     .. versionadded:: 1.4.0
#
#         The "pyarrow" engine was added as an *experimental* engine, and some features
#         are unsupported, or may not work correctly, with this engine.
# converters : dict, optional
#     Dict of functions for converting values in certain columns. Keys can either
#     be integers or column labels.
# true_values : list, optional
#     Values to consider as True.
# false_values : list, optional
#     Values to consider as False.
# skipinitialspace : bool, default False
#     Skip spaces after delimiter.
# skiprows : list-like, int or callable, optional
#     Line numbers to skip (0-indexed) or number of lines to skip (int)
#     at the start of the file.
#
#     If callable, the callable function will be evaluated against the row
#     indices, returning True if the row should be skipped and False otherwise.
#     An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
# skipfooter : int, default 0
#     Number of lines at bottom of file to skip (Unsupported with engine='c').
# nrows : int, optional
#     Number of rows of file to read. Useful for reading pieces of large files.
# na_values : scalar, str, list-like, or dict, optional
#     Additional strings to recognize as NA/NaN. If dict passed, specific
#     per-column NA values.  By default the following values are interpreted as
#     NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',
#     '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',
#     'nan', 'null'.
# keep_default_na : bool, default True
#     Whether or not to include the default NaN values when parsing the data.
#     Depending on whether `na_values` is passed in, the behavior is as follows:
#
#     * If `keep_default_na` is True, and `na_values` are specified, `na_values`
#       is appended to the default NaN values used for parsing.
#     * If `keep_default_na` is True, and `na_values` are not specified, only
#       the default NaN values are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are specified, only
#       the NaN values specified `na_values` are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are not specified, no
#       strings will be parsed as NaN.
#
#     Note that if `na_filter` is passed in as False, the `keep_default_na` and
#     `na_values` parameters will be ignored.
# na_filter : bool, default True
#     Detect missing value markers (empty strings and the value of na_values). In
#     data without any NAs, passing na_filter=False can improve the performance
#     of reading a large file.
# verbose : bool, default False
#     Indicate number of NA values placed in non-numeric columns.
# skip_blank_lines : bool, default True
#     If True, skip over blank lines rather than interpreting as NaN values.
# parse_dates : bool or list of int or names or list of lists or dict, default False
#     The behavior is as follows:
#
#     * boolean. If True -> try parsing the index.
#     * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
#       each as a separate date column.
#     * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
#       a single date column.
#     * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call
#       result 'foo'
#
#     If a column or index cannot be represented as an array of datetimes,
#     say because of an unparsable value or a mixture of timezones, the column
#     or index will be returned unaltered as an object data type. For
#     non-standard datetime parsing, use ``pd.to_datetime`` after
#     ``pd.read_csv``. To parse an index or column with a mixture of timezones,
#     specify ``date_parser`` to be a partially-applied
#     :func:`pandas.to_datetime` with ``utc=True``. See
#     :ref:`io.csv.mixed_timezones` for more.
#
#     Note: A fast-path exists for iso8601-formatted dates.
# infer_datetime_format : bool, default False
#     If True and `parse_dates` is enabled, pandas will attempt to infer the
#     format of the datetime strings in the columns, and if it can be inferred,
#     switch to a faster method of parsing them. In some cases this can increase
#     the parsing speed by 5-10x.
# keep_date_col : bool, default False
#     If True and `parse_dates` specifies combining multiple columns then
#     keep the original columns.
# date_parser : function, optional
#     Function to use for converting a sequence of string columns to an array of
#     datetime instances. The default uses ``dateutil.parser.parser`` to do the
#     conversion. Pandas will try to call `date_parser` in three different ways,
#     advancing to the next if an exception occurs: 1) Pass one or more arrays
#     (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
#     string values from the columns defined by `parse_dates` into a single array
#     and pass that; and 3) call `date_parser` once for each row using one or
#     more strings (corresponding to the columns defined by `parse_dates`) as
#     arguments.
# dayfirst : bool, default False
#     DD/MM format dates, international and European format.
# cache_dates : bool, default True
#     If True, use a cache of unique, converted dates to apply the datetime
#     conversion. May produce significant speed-up when parsing duplicate
#     date strings, especially ones with timezone offsets.
#
#     .. versionadded:: 0.25.0
# iterator : bool, default False
#     Return TextFileReader object for iteration or getting chunks with
#     ``get_chunk()``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# chunksize : int, optional
#     Return TextFileReader object for iteration.
#     See the `IO Tools docs
#     <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
#     for more information on ``iterator`` and ``chunksize``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# compression : str or dict, default 'infer'
#     For on-the-fly decompression of on-disk data. If 'infer' and '%s' is
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). If using
#     'zip', the ZIP file must contain only one data file to be read in. Set to
#     ``None`` for no decompression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for Zstandard decompression using a
#     custom compression dictionary:
#     ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
#
#     .. versionchanged:: 1.4.0 Zstandard support.
#
# thousands : str, optional
#     Thousands separator.
# decimal : str, default '.'
#     Character to recognize as decimal point (e.g. use ',' for European data).
# lineterminator : str (length 1), optional
#     Character to break file into lines. Only valid with C parser.
# quotechar : str (length 1), optional
#     The character used to denote the start and end of a quoted item. Quoted
#     items can include the delimiter and it will be ignored.
# quoting : int or csv.QUOTE_* instance, default 0
#     Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
#     QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
# doublequote : bool, default ``True``
#    When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
#    whether or not to interpret two consecutive quotechar elements INSIDE a
#    field as a single ``quotechar`` element.
# escapechar : str (length 1), optional
#     One-character string used to escape other characters.
# comment : str, optional
#     Indicates remainder of line should not be parsed. If found at the beginning
#     of a line, the line will be ignored altogether. This parameter must be a
#     single character. Like empty lines (as long as ``skip_blank_lines=True``),
#     fully commented lines are ignored by the parameter `header` but not by
#     `skiprows`. For example, if ``comment='#'``, parsing
#     ``#empty\na,b,c\n1,2,3`` with ``header=0`` will result in 'a,b,c' being
#     treated as the header.
# encoding : str, optional
#     Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
#     standard encodings
#     <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
#
#     .. versionchanged:: 1.2
#
#        When ``encoding`` is ``None``, ``errors="replace"`` is passed to
#        ``open()``. Otherwise, ``errors="strict"`` is passed to ``open()``.
#        This behavior was previously only the case for ``engine="python"``.
#
#     .. versionchanged:: 1.3.0
#
#        ``encoding_errors`` is a new argument. ``encoding`` has no longer an
#        influence on how encoding errors are handled.
#
# encoding_errors : str, optional, default "strict"
#     How encoding errors are treated. `List of possible values
#     <https://docs.python.org/3/library/codecs.html#error-handlers>`_ .
#
#     .. versionadded:: 1.3.0
#
# dialect : str or csv.Dialect, optional
#     If provided, this parameter will override values (default or not) for the
#     following parameters: `delimiter`, `doublequote`, `escapechar`,
#     `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
#     override values, a ParserWarning will be issued. See csv.Dialect
#     documentation for more details.
# error_bad_lines : bool, optional, default ``None``
#     Lines with too many fields (e.g. a csv line with too many commas) will by
#     default cause an exception to be raised, and no DataFrame will be returned.
#     If False, then these "bad lines" will be dropped from the DataFrame that is
#     returned.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# warn_bad_lines : bool, optional, default ``None``
#     If error_bad_lines is False, and warn_bad_lines is True, a warning for each
#     "bad line" will be output.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# on_bad_lines : {'error', 'warn', 'skip'} or callable, default 'error'
#     Specifies what to do upon encountering a bad line (a line with too many fields).
#     Allowed values are :
#
#         - 'error', raise an Exception when a bad line is encountered.
#         - 'warn', raise a warning when a bad line is encountered and skip that line.
#         - 'skip', skip bad lines without raising or warning when they are encountered.
#
#     .. versionadded:: 1.3.0
#
#         - callable, function with signature
#           ``(bad_line: list[str]) -> list[str] | None`` that will process a single
#           bad line. ``bad_line`` is a list of strings split by the ``sep``.
#           If the function returns ``None``, the bad line will be ignored.
#           If the function returns a new list of strings with more elements than
#           expected, a ``ParserWarning`` will be emitted while dropping extra elements.
#           Only supported when ``engine="python"``
#
#     .. versionadded:: 1.4.0
#
# delim_whitespace : bool, default False
#     Specifies whether or not whitespace (e.g. ``' '`` or ``'    '``) will be
#     used as the sep. Equivalent to setting ``sep='\s+'``. If this option
#     is set to True, nothing should be passed in for the ``delimiter``
#     parameter.
# low_memory : bool, default True
#     Internally process the file in chunks, resulting in lower memory use
#     while parsing, but possibly mixed type inference.  To ensure no mixed
#     types either set False, or specify the type with the `dtype` parameter.
#     Note that the entire file is read into a single DataFrame regardless,
#     use the `chunksize` or `iterator` parameter to return the data in chunks.
#     (Only valid with C parser).
# memory_map : bool, default False
#     If a filepath is provided for `filepath_or_buffer`, map the file object
#     directly onto memory and access the data directly from there. Using this
#     option can improve performance because there is no longer any I/O overhead.
# float_precision : str, optional
#     Specifies which converter the C engine should use for floating-point
#     values. The options are ``None`` or 'high' for the ordinary converter,
#     'legacy' for the original lower precision pandas converter, and
#     'round_trip' for the round-trip converter.
#
#     .. versionchanged:: 1.2
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2
#
# Returns
# -------
# DataFrame or TextParser
#     A comma-separated values (csv) file is returned as two-dimensional
#     data structure with labeled axes.
#
# See Also
# --------
# DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_fwf : Read a table of fixed-width formatted lines into DataFrame.
#
# Examples
# --------
# >>> pd.read_csv('data.csv')  # doctest: +SKIP
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.plotting._misc.autocorrelation_plot</u></summary>
# <blockquote>
# <code>
# Autocorrelation plot for time series.
#
# Parameters
# ----------
# series : Time series
# ax : Matplotlib axis object, optional
# **kwargs
#     Options to pass to matplotlib plotting method.
#
# Returns
# -------
# class:`matplotlib.axis.Axes`
#
# Examples
# --------
#
# The horizontal lines in the plot correspond to 95% and 99% confidence bands.
#
# The dashed line is 99% confidence band.
#
# .. plot::
#     :context: close-figs
#
#     >>> spacing = np.linspace(-9 * np.pi, 9 * np.pi, num=1000)
#     >>> s = pd.Series(0.7 * np.random.rand(1000) + 0.3 * np.sin(spacing))
#     >>> pd.plotting.autocorrelation_plot(s)
#     <AxesSubplot:title={'center':'width'}, xlabel='Lag', ylabel='Autocorrelation'>
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>plotly</b>
# <ul>
# <li>
# <details><summary><u>plotly.graph_objs._figure.Figure</u></summary>
# <blockquote>
# <code>
# Base class for all figure types (both widget and non-widget)
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.graph_objs._layout.Layout</u></summary>
# <blockquote>
# <code>
# Base class for the layout type. The Layout class itself is a
# code-generated subclass.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.graph_objs._scatter.Scatter</u></summary>
# <blockquote>
# <code>
# Base class for the all trace types.
#
# Specific trace type classes (Scatter, Bar, etc.) are code generated as
# subclasses of this class.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.offline</u></summary>
# <blockquote>
# <code>
# offline
# ======
# This module provides offline functionality.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.offline.offline.iplot</u></summary>
# <blockquote>
# <code>
# Draw plotly graphs inside an IPython or Jupyter notebook
#
# figure_or_data -- a plotly.graph_objs.Figure or plotly.graph_objs.Data or
#                   dict or list that describes a Plotly graph.
#                   See https://plot.ly/python/ for examples of
#                   graph descriptions.
#
# Keyword arguments:
# show_link (default=False) -- display a link in the bottom-right corner of
#                             of the chart that will export the chart to
#                             Plotly Cloud or Plotly Enterprise
# link_text (default='Export to plot.ly') -- the text of export link
# validate (default=True) -- validate that all of the keys in the figure
#                            are valid? omit if your version of plotly.js
#                            has become outdated with your version of
#                            graph_reference.json or if you need to include
#                            extra, unnecessary keys in your figure.
# image (default=None |'png' |'jpeg' |'svg' |'webp') -- This parameter sets
#     the format of the image to be downloaded, if we choose to download an
#     image. This parameter has a default value of None indicating that no
#     image should be downloaded. Please note: for higher resolution images
#     and more export options, consider using plotly.io.write_image. See
#     https://plot.ly/python/static-image-export/ for more details.
# filename (default='plot') -- Sets the name of the file your image
#     will be saved to. The extension should not be included.
# image_height (default=600) -- Specifies the height of the image in `px`.
# image_width (default=800) -- Specifies the width of the image in `px`.
# config (default=None) -- Plot view options dictionary. Keyword arguments
#     `show_link` and `link_text` set the associated options in this
#     dictionary if it doesn't contain them already.
# auto_play (default=True) -- Whether to automatically start the animation
#     sequence on page load, if the figure contains frames. Has no effect if
#     the figure does not contain frames.
# animation_opts (default=None) -- Dict of custom animation parameters that
#     are used for the automatically started animation on page load. This
#     dict is passed to the function Plotly.animate in Plotly.js. See
#     https://github.com/plotly/plotly.js/blob/master/src/plots/animation_attributes.js
#     for available options. Has no effect if the figure
#     does not contain frames, or auto_play is False.
#
# Example:
# ```
# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode()
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}])
# We can also download an image of the plot by setting the image to the
# format you want. e.g. `image='png'`
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}], image='png')
# ```
#
# animation_opts Example:
# ```
# from plotly.offline import iplot
# figure = {'data': [{'x': [0, 1], 'y': [0, 1]}],
#           'layout': {'xaxis': {'range': [0, 5], 'autorange': False},
#                      'yaxis': {'range': [0, 5], 'autorange': False},
#                      'title': 'Start Title'},
#           'frames': [{'data': [{'x': [1, 2], 'y': [1, 2]}]},
#                      {'data': [{'x': [1, 4], 'y': [1, 4]}]},
#                      {'data': [{'x': [3, 4], 'y': [3, 4]}],
#                       'layout': {'title': 'End Title'}}]}
# iplot(figure, animation_opts={'frame': {'duration': 1}})
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>scipy</b>
# <ul>
# <li>
# <details><summary><u>scipy.stats</u></summary>
# <blockquote>
# <code>
# .. _statsrefmanual:
#
# ==========================================
# Statistical functions (:mod:`scipy.stats`)
# ==========================================
#
# .. currentmodule:: scipy.stats
#
# This module contains a large number of probability distributions,
# summary and frequency statistics, correlation functions and statistical
# tests, masked statistics, kernel density estimation, quasi-Monte Carlo
# functionality, and more.
#
# Statistics is a very large area, and there are topics that are out of scope
# for SciPy and are covered by other packages. Some of the most important ones
# are:
#
# - `statsmodels <https://www.statsmodels.org/stable/index.html>`__:
#   regression, linear models, time series analysis, extensions to topics
#   also covered by ``scipy.stats``.
# - `Pandas <https://pandas.pydata.org/>`__: tabular data, time series
#   functionality, interfaces to other statistical languages.
# - `PyMC <https://docs.pymc.io/>`__: Bayesian statistical
#   modeling, probabilistic machine learning.
# - `scikit-learn <https://scikit-learn.org/>`__: classification, regression,
#   model selection.
# - `Seaborn <https://seaborn.pydata.org/>`__: statistical data visualization.
# - `rpy2 <https://rpy2.github.io/>`__: Python to R bridge.
#
#
# Probability distributions
# =========================
#
# Each univariate distribution is an instance of a subclass of `rv_continuous`
# (`rv_discrete` for discrete distributions):
#
# .. autosummary::
#    :toctree: generated/
#
#    rv_continuous
#    rv_discrete
#    rv_histogram
#
# Continuous distributions
# ------------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    alpha             -- Alpha
#    anglit            -- Anglit
#    arcsine           -- Arcsine
#    argus             -- Argus
#    beta              -- Beta
#    betaprime         -- Beta Prime
#    bradford          -- Bradford
#    burr              -- Burr (Type III)
#    burr12            -- Burr (Type XII)
#    cauchy            -- Cauchy
#    chi               -- Chi
#    chi2              -- Chi-squared
#    cosine            -- Cosine
#    crystalball       -- Crystalball
#    dgamma            -- Double Gamma
#    dweibull          -- Double Weibull
#    erlang            -- Erlang
#    expon             -- Exponential
#    exponnorm         -- Exponentially Modified Normal
#    exponweib         -- Exponentiated Weibull
#    exponpow          -- Exponential Power
#    f                 -- F (Snecdor F)
#    fatiguelife       -- Fatigue Life (Birnbaum-Saunders)
#    fisk              -- Fisk
#    foldcauchy        -- Folded Cauchy
#    foldnorm          -- Folded Normal
#    genlogistic       -- Generalized Logistic
#    gennorm           -- Generalized normal
#    genpareto         -- Generalized Pareto
#    genexpon          -- Generalized Exponential
#    genextreme        -- Generalized Extreme Value
#    gausshyper        -- Gauss Hypergeometric
#    gamma             -- Gamma
#    gengamma          -- Generalized gamma
#    genhalflogistic   -- Generalized Half Logistic
#    genhyperbolic     -- Generalized Hyperbolic
#    geninvgauss       -- Generalized Inverse Gaussian
#    gilbrat           -- Gilbrat
#    gompertz          -- Gompertz (Truncated Gumbel)
#    gumbel_r          -- Right Sided Gumbel, Log-Weibull, Fisher-Tippett, Extreme Value Type I
#    gumbel_l          -- Left Sided Gumbel, etc.
#    halfcauchy        -- Half Cauchy
#    halflogistic      -- Half Logistic
#    halfnorm          -- Half Normal
#    halfgennorm       -- Generalized Half Normal
#    hypsecant         -- Hyperbolic Secant
#    invgamma          -- Inverse Gamma
#    invgauss          -- Inverse Gaussian
#    invweibull        -- Inverse Weibull
#    johnsonsb         -- Johnson SB
#    johnsonsu         -- Johnson SU
#    kappa4            -- Kappa 4 parameter
#    kappa3            -- Kappa 3 parameter
#    ksone             -- Distribution of Kolmogorov-Smirnov one-sided test statistic
#    kstwo             -- Distribution of Kolmogorov-Smirnov two-sided test statistic
#    kstwobign         -- Limiting Distribution of scaled Kolmogorov-Smirnov two-sided test statistic.
#    laplace           -- Laplace
#    laplace_asymmetric    -- Asymmetric Laplace
#    levy              -- Levy
#    levy_l
#    levy_stable
#    logistic          -- Logistic
#    loggamma          -- Log-Gamma
#    loglaplace        -- Log-Laplace (Log Double Exponential)
#    lognorm           -- Log-Normal
#    loguniform        -- Log-Uniform
#    lomax             -- Lomax (Pareto of the second kind)
#    maxwell           -- Maxwell
#    mielke            -- Mielke's Beta-Kappa
#    moyal             -- Moyal
#    nakagami          -- Nakagami
#    ncx2              -- Non-central chi-squared
#    ncf               -- Non-central F
#    nct               -- Non-central Student's T
#    norm              -- Normal (Gaussian)
#    norminvgauss      -- Normal Inverse Gaussian
#    pareto            -- Pareto
#    pearson3          -- Pearson type III
#    powerlaw          -- Power-function
#    powerlognorm      -- Power log normal
#    powernorm         -- Power normal
#    rdist             -- R-distribution
#    rayleigh          -- Rayleigh
#    rice              -- Rice
#    recipinvgauss     -- Reciprocal Inverse Gaussian
#    semicircular      -- Semicircular
#    skewcauchy        -- Skew Cauchy
#    skewnorm          -- Skew normal
#    studentized_range    -- Studentized Range
#    t                 -- Student's T
#    trapezoid         -- Trapezoidal
#    triang            -- Triangular
#    truncexpon        -- Truncated Exponential
#    truncnorm         -- Truncated Normal
#    tukeylambda       -- Tukey-Lambda
#    uniform           -- Uniform
#    vonmises          -- Von-Mises (Circular)
#    vonmises_line     -- Von-Mises (Line)
#    wald              -- Wald
#    weibull_min       -- Minimum Weibull (see Frechet)
#    weibull_max       -- Maximum Weibull (see Frechet)
#    wrapcauchy        -- Wrapped Cauchy
#
# Multivariate distributions
# --------------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    multivariate_normal    -- Multivariate normal distribution
#    matrix_normal          -- Matrix normal distribution
#    dirichlet              -- Dirichlet
#    wishart                -- Wishart
#    invwishart             -- Inverse Wishart
#    multinomial            -- Multinomial distribution
#    special_ortho_group    -- SO(N) group
#    ortho_group            -- O(N) group
#    unitary_group          -- U(N) group
#    random_correlation     -- random correlation matrices
#    multivariate_t         -- Multivariate t-distribution
#    multivariate_hypergeom -- Multivariate hypergeometric distribution
#
# Discrete distributions
# ----------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    bernoulli                -- Bernoulli
#    betabinom                -- Beta-Binomial
#    binom                    -- Binomial
#    boltzmann                -- Boltzmann (Truncated Discrete Exponential)
#    dlaplace                 -- Discrete Laplacian
#    geom                     -- Geometric
#    hypergeom                -- Hypergeometric
#    logser                   -- Logarithmic (Log-Series, Series)
#    nbinom                   -- Negative Binomial
#    nchypergeom_fisher       -- Fisher's Noncentral Hypergeometric
#    nchypergeom_wallenius    -- Wallenius's Noncentral Hypergeometric
#    nhypergeom               -- Negative Hypergeometric
#    planck                   -- Planck (Discrete Exponential)
#    poisson                  -- Poisson
#    randint                  -- Discrete Uniform
#    skellam                  -- Skellam
#    yulesimon                -- Yule-Simon
#    zipf                     -- Zipf (Zeta)
#    zipfian                  -- Zipfian
#
# An overview of statistical functions is given below.  Many of these functions
# have a similar version in `scipy.stats.mstats` which work for masked arrays.
#
# Summary statistics
# ==================
#
# .. autosummary::
#    :toctree: generated/
#
#    describe          -- Descriptive statistics
#    gmean             -- Geometric mean
#    hmean             -- Harmonic mean
#    kurtosis          -- Fisher or Pearson kurtosis
#    mode              -- Modal value
#    moment            -- Central moment
#    skew              -- Skewness
#    kstat             --
#    kstatvar          --
#    tmean             -- Truncated arithmetic mean
#    tvar              -- Truncated variance
#    tmin              --
#    tmax              --
#    tstd              --
#    tsem              --
#    variation         -- Coefficient of variation
#    find_repeats
#    trim_mean
#    gstd              -- Geometric Standard Deviation
#    iqr
#    sem
#    bayes_mvs
#    mvsdist
#    entropy
#    differential_entropy
#    median_absolute_deviation
#    median_abs_deviation
#    bootstrap
#
# Frequency statistics
# ====================
#
# .. autosummary::
#    :toctree: generated/
#
#    cumfreq
#    itemfreq
#    percentileofscore
#    scoreatpercentile
#    relfreq
#
# .. autosummary::
#    :toctree: generated/
#
#    binned_statistic     -- Compute a binned statistic for a set of data.
#    binned_statistic_2d  -- Compute a 2-D binned statistic for a set of data.
#    binned_statistic_dd  -- Compute a d-D binned statistic for a set of data.
#
# Correlation functions
# =====================
#
# .. autosummary::
#    :toctree: generated/
#
#    f_oneway
#    alexandergovern
#    pearsonr
#    spearmanr
#    pointbiserialr
#    kendalltau
#    weightedtau
#    somersd
#    linregress
#    siegelslopes
#    theilslopes
#    multiscale_graphcorr
#
# Statistical tests
# =================
#
# .. autosummary::
#    :toctree: generated/
#
#    ttest_1samp
#    ttest_ind
#    ttest_ind_from_stats
#    ttest_rel
#    chisquare
#    cramervonmises
#    cramervonmises_2samp
#    power_divergence
#    kstest
#    ks_1samp
#    ks_2samp
#    epps_singleton_2samp
#    mannwhitneyu
#    tiecorrect
#    rankdata
#    ranksums
#    wilcoxon
#    kruskal
#    friedmanchisquare
#    brunnermunzel
#    combine_pvalues
#    jarque_bera
#    page_trend_test
#    permutation_test
#    tukey_hsd
#
# .. autosummary::
#    :toctree: generated/
#
#    ansari
#    bartlett
#    levene
#    shapiro
#    anderson
#    anderson_ksamp
#    binom_test
#    binomtest
#    fligner
#    median_test
#    mood
#    skewtest
#    kurtosistest
#    normaltest
#
#
# Quasi-Monte Carlo
# =================
#
# .. toctree::
#    :maxdepth: 4
#
#    stats.qmc
#
#
# Masked statistics functions
# ===========================
#
# .. toctree::
#
#    stats.mstats
#
#
# Other statistical functionality
# ===============================
#
# Transformations
# ---------------
#
# .. autosummary::
#    :toctree: generated/
#
#    boxcox
#    boxcox_normmax
#    boxcox_llf
#    yeojohnson
#    yeojohnson_normmax
#    yeojohnson_llf
#    obrientransform
#    sigmaclip
#    trimboth
#    trim1
#    zmap
#    zscore
#    gzscore
#
# Statistical distances
# ---------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    wasserstein_distance
#    energy_distance
#
# Sampling
# --------
#
# .. toctree::
#    :maxdepth: 4
#
#    stats.sampling
#
# Random variate generation / CDF Inversion
# -----------------------------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    rvs_ratio_uniforms
#    NumericalInverseHermite
#
# Circular statistical functions
# ------------------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    circmean
#    circvar
#    circstd
#
# Contingency table functions
# ---------------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    chi2_contingency
#    contingency.crosstab
#    contingency.expected_freq
#    contingency.margins
#    contingency.relative_risk
#    contingency.association
#    fisher_exact
#    barnard_exact
#    boschloo_exact
#
# Plot-tests
# ----------
#
# .. autosummary::
#    :toctree: generated/
#
#    ppcc_max
#    ppcc_plot
#    probplot
#    boxcox_normplot
#    yeojohnson_normplot
#
# Univariate and multivariate kernel density estimation
# -----------------------------------------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    gaussian_kde
#
# Warnings / Errors used in :mod:`scipy.stats`
# --------------------------------------------
#
# .. autosummary::
#    :toctree: generated/
#
#    F_onewayConstantInputWarning
#    F_onewayBadInputSizesWarning
#    PearsonRConstantInputWarning
#    PearsonRNearConstantInputWarning
#    SpearmanRConstantInputWarning
#    BootstrapDegenerateDistributionWarning
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>seaborn</b>
# <ul>
# <li>
# <details><summary><u>seaborn.categorical.barplot</u></summary>
# <blockquote>
# <code>
# Show point estimates and confidence intervals as rectangular bars.
#
# A bar plot represents an estimate of central tendency for a numeric
# variable with the height of each rectangle and provides some indication of
# the uncertainty around that estimate using error bars. Bar plots include 0
# in the quantitative axis range, and they are a good choice when 0 is a
# meaningful value for the quantitative variable, and you want to make
# comparisons against it.
#
# For datasets where 0 is not a meaningful value, a point plot will allow you
# to focus on differences between levels of one or more categorical
# variables.
#
# It is also important to keep in mind that a bar plot shows only the mean
# (or other estimator) value, but in many cases it may be more informative to
# show the distribution of values at each level of the categorical variables.
# In that case, other approaches such as a box or violin plot may be more
# appropriate.
#
#
# Input data can be passed in a variety of formats, including:
#
# - Vectors of data represented as lists, numpy arrays, or pandas Series
#   objects passed directly to the ``x``, ``y``, and/or ``hue`` parameters.
# - A "long-form" DataFrame, in which case the ``x``, ``y``, and ``hue``
#   variables will determine how the data are plotted.
# - A "wide-form" DataFrame, such that each numeric column will be plotted.
# - An array or list of vectors.
#
# In most cases, it is possible to use numpy or Python objects, but pandas
# objects are preferable because the associated names will be used to
# annotate the axes. Additionally, you can use Categorical types for the
# grouping variables to control the order of plot elements.    
#
# This function always treats one of the variables as categorical and
# draws data at ordinal positions (0, 1, ... n) on the relevant axis, even
# when the data has a numeric or date type.
#
# See the :ref:`tutorial <categorical_tutorial>` for more information.    
#
# Parameters
# ----------
# x, y, hue : names of variables in ``data`` or vector data, optional
#     Inputs for plotting long-form data. See examples for interpretation.        
# data : DataFrame, array, or list of arrays, optional
#     Dataset for plotting. If ``x`` and ``y`` are absent, this is
#     interpreted as wide-form. Otherwise it is expected to be long-form.    
# order, hue_order : lists of strings, optional
#     Order to plot the categorical levels in, otherwise the levels are
#     inferred from the data objects.        
# estimator : callable that maps vector -> scalar, optional
#     Statistical function to estimate within each categorical bin.
# ci : float or "sd" or None, optional
#     Size of confidence intervals to draw around estimated values.  If
#     "sd", skip bootstrapping and draw the standard deviation of the
#     observations. If ``None``, no bootstrapping will be performed, and
#     error bars will not be drawn.
# n_boot : int, optional
#     Number of bootstrap iterations to use when computing confidence
#     intervals.
# units : name of variable in ``data`` or vector data, optional
#     Identifier of sampling units, which will be used to perform a
#     multilevel bootstrap and account for repeated measures design.
# seed : int, numpy.random.Generator, or numpy.random.RandomState, optional
#     Seed or random number generator for reproducible bootstrapping.    
# orient : "v" | "h", optional
#     Orientation of the plot (vertical or horizontal). This is usually
#     inferred based on the type of the input variables, but it can be used
#     to resolve ambiguity when both `x` and `y` are numeric or when
#     plotting wide-form data.    
# color : matplotlib color, optional
#     Color for all of the elements, or seed for a gradient palette.    
# palette : palette name, list, or dict
#     Colors to use for the different levels of the ``hue`` variable. Should
#     be something that can be interpreted by :func:`color_palette`, or a
#     dictionary mapping hue levels to matplotlib colors.    
# saturation : float, optional
#     Proportion of the original saturation to draw colors at. Large patches
#     often look better with slightly desaturated colors, but set this to
#     ``1`` if you want the plot colors to perfectly match the input color
#     spec.    
# errcolor : matplotlib color
#     Color for the lines that represent the confidence interval.
# errwidth : float, optional
#     Thickness of error bar lines (and caps).         
# capsize : float, optional
#     Width of the "caps" on error bars.
#
# dodge : bool, optional
#     When hue nesting is used, whether elements should be shifted along the
#     categorical axis.    
# ax : matplotlib Axes, optional
#     Axes object to draw the plot onto, otherwise uses the current Axes.    
# kwargs : key, value mappings
#     Other keyword arguments are passed through to
#     :meth:`matplotlib.axes.Axes.bar`.
#
# Returns
# -------
# ax : matplotlib Axes
#     Returns the Axes object with the plot drawn onto it.    
#
# See Also
# --------
# countplot : Show the counts of observations in each categorical bin.    
# pointplot : Show point estimates and confidence intervals using scatterplot
#             glyphs.    
# catplot : Combine a categorical plot with a :class:`FacetGrid`.    
#
# Examples
# --------
#
# Draw a set of vertical bar plots grouped by a categorical variable:
#
# .. plot::
#     :context: close-figs
#
#     >>> import seaborn as sns
#     >>> sns.set_theme(style="whitegrid")
#     >>> tips = sns.load_dataset("tips")
#     >>> ax = sns.barplot(x="day", y="total_bill", data=tips)
#
# Draw a set of vertical bars with nested grouping by a two variables:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="total_bill", hue="sex", data=tips)
#
# Draw a set of horizontal bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="tip", y="day", data=tips)
#
# Control bar order by passing an explicit order:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="time", y="tip", data=tips,
#     ...                  order=["Dinner", "Lunch"])
#
# Use median as the estimate of central tendency:
#
# .. plot::
#     :context: close-figs
#
#     >>> from numpy import median
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, estimator=median)
#
# Show the standard error of the mean with the error bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, ci=68)
#
# Show standard deviation of observations instead of a confidence interval:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, ci="sd")
#
# Add "caps" to the error bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, capsize=.2)
#
# Use a different color palette for the bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="size", y="total_bill", data=tips,
#     ...                  palette="Blues_d")
#
# Use ``hue`` without changing bar position or width:
#
# .. plot::
#     :context: close-figs
#
#     >>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"])
#     >>> ax = sns.barplot(x="day", y="total_bill", hue="weekend",
#     ...                  data=tips, dodge=False)
#
# Plot all bars in a single color:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="size", y="total_bill", data=tips,
#     ...                  color="salmon", saturation=.5)
#
# Use :meth:`matplotlib.axes.Axes.bar` parameters to control the style.
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="total_bill", data=tips,
#     ...                  linewidth=2.5, facecolor=(1, 1, 1, 0),
#     ...                  errcolor=".2", edgecolor=".2")
#
# Use :func:`catplot` to combine a :func:`barplot` and a :class:`FacetGrid`.
# This allows grouping within additional categorical variables. Using
# :func:`catplot` is safer than using :class:`FacetGrid` directly, as it
# ensures synchronization of variable order across facets:
#
# .. plot::
#     :context: close-figs
#
#     >>> g = sns.catplot(x="sex", y="total_bill",
#     ...                 hue="smoker", col="time",
#     ...                 data=tips, kind="bar",
#     ...                 height=4, aspect=.7);
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>sklearn</b>
# <ul>
# <li>
# <details><summary><u>sklearn.model_selection._split.KFold</u></summary>
# <blockquote>
# <code>
# K-Folds cross-validator
#
# Provides train/test indices to split data in train/test sets. Split
# dataset into k consecutive folds (without shuffling by default).
#
# Each fold is then used once as a validation while the k - 1 remaining
# folds form the training set.
#
# Read more in the :ref:`User Guide <k_fold>`.
#
# Parameters
# ----------
# n_splits : int, default=5
#     Number of folds. Must be at least 2.
#
#     .. versionchanged:: 0.22
#         ``n_splits`` default value changed from 3 to 5.
#
# shuffle : bool, default=False
#     Whether to shuffle the data before splitting into batches.
#     Note that the samples within each split will not be shuffled.
#
# random_state : int, RandomState instance or None, default=None
#     When `shuffle` is True, `random_state` affects the ordering of the
#     indices, which controls the randomness of each fold. Otherwise, this
#     parameter has no effect.
#     Pass an int for reproducible output across multiple function calls.
#     See :term:`Glossary <random_state>`.
#
# Examples
# --------
# >>> import numpy as np
# >>> from sklearn.model_selection import KFold
# >>> X = np.array([[1, 2], [3, 4], [1, 2], [3, 4]])
# >>> y = np.array([1, 2, 3, 4])
# >>> kf = KFold(n_splits=2)
# >>> kf.get_n_splits(X)
# 2
# >>> print(kf)
# KFold(n_splits=2, random_state=None, shuffle=False)
# >>> for train_index, test_index in kf.split(X):
# ...     print("TRAIN:", train_index, "TEST:", test_index)
# ...     X_train, X_test = X[train_index], X[test_index]
# ...     y_train, y_test = y[train_index], y[test_index]
# TRAIN: [2 3] TEST: [0 1]
# TRAIN: [0 1] TEST: [2 3]
#
# Notes
# -----
# The first ``n_samples % n_splits`` folds have size
# ``n_samples // n_splits + 1``, other folds have size
# ``n_samples // n_splits``, where ``n_samples`` is the number of samples.
#
# Randomized CV splitters may return different results for each call of
# split. You can make the results identical by setting `random_state`
# to an integer.
#
# See Also
# --------
# StratifiedKFold : Takes group information into account to avoid building
#     folds with imbalanced class distributions (for binary or multiclass
#     classification tasks).
#
# GroupKFold : K-fold iterator variant with non-overlapping groups.
#
# RepeatedKFold : Repeats K-Fold n times.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>sklearn.model_selection._split.train_test_split</u></summary>
# <blockquote>
# <code>
# Split arrays or matrices into random train and test subsets.
#
# Quick utility that wraps input validation and
# ``next(ShuffleSplit().split(X, y))`` and application to input data
# into a single call for splitting (and optionally subsampling) data in a
# oneliner.
#
# Read more in the :ref:`User Guide <cross_validation>`.
#
# Parameters
# ----------
# *arrays : sequence of indexables with same length / shape[0]
#     Allowed inputs are lists, numpy arrays, scipy-sparse
#     matrices or pandas dataframes.
#
# test_size : float or int, default=None
#     If float, should be between 0.0 and 1.0 and represent the proportion
#     of the dataset to include in the test split. If int, represents the
#     absolute number of test samples. If None, the value is set to the
#     complement of the train size. If ``train_size`` is also None, it will
#     be set to 0.25.
#
# train_size : float or int, default=None
#     If float, should be between 0.0 and 1.0 and represent the
#     proportion of the dataset to include in the train split. If
#     int, represents the absolute number of train samples. If None,
#     the value is automatically set to the complement of the test size.
#
# random_state : int, RandomState instance or None, default=None
#     Controls the shuffling applied to the data before applying the split.
#     Pass an int for reproducible output across multiple function calls.
#     See :term:`Glossary <random_state>`.
#
# shuffle : bool, default=True
#     Whether or not to shuffle the data before splitting. If shuffle=False
#     then stratify must be None.
#
# stratify : array-like, default=None
#     If not None, data is split in a stratified fashion, using this as
#     the class labels.
#     Read more in the :ref:`User Guide <stratification>`.
#
# Returns
# -------
# splitting : list, length=2 * len(arrays)
#     List containing train-test split of inputs.
#
#     .. versionadded:: 0.16
#         If the input is sparse, the output will be a
#         ``scipy.sparse.csr_matrix``. Else, output type is the same as the
#         input type.
#
# Examples
# --------
# >>> import numpy as np
# >>> from sklearn.model_selection import train_test_split
# >>> X, y = np.arange(10).reshape((5, 2)), range(5)
# >>> X
# array([[0, 1],
#        [2, 3],
#        [4, 5],
#        [6, 7],
#        [8, 9]])
# >>> list(y)
# [0, 1, 2, 3, 4]
#
# >>> X_train, X_test, y_train, y_test = train_test_split(
# ...     X, y, test_size=0.33, random_state=42)
# ...
# >>> X_train
# array([[4, 5],
#        [0, 1],
#        [6, 7]])
# >>> y_train
# [2, 0, 3]
# >>> X_test
# array([[2, 3],
#        [8, 9]])
# >>> y_test
# [1, 4]
#
# >>> train_test_split(y, shuffle=False)
# [[0, 1, 2], [3, 4]]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>sklearn.model_selection._validation.cross_val_score</u></summary>
# <blockquote>
# <code>
# Evaluate a score by cross-validation.
#
# Read more in the :ref:`User Guide <cross_validation>`.
#
# Parameters
# ----------
# estimator : estimator object implementing 'fit'
#     The object to use to fit the data.
#
# X : array-like of shape (n_samples, n_features)
#     The data to fit. Can be for example a list, or an array.
#
# y : array-like of shape (n_samples,) or (n_samples, n_outputs),             default=None
#     The target variable to try to predict in the case of
#     supervised learning.
#
# groups : array-like of shape (n_samples,), default=None
#     Group labels for the samples used while splitting the dataset into
#     train/test set. Only used in conjunction with a "Group" :term:`cv`
#     instance (e.g., :class:`GroupKFold`).
#
# scoring : str or callable, default=None
#     A str (see model evaluation documentation) or
#     a scorer callable object / function with signature
#     ``scorer(estimator, X, y)`` which should return only
#     a single value.
#
#     Similar to :func:`cross_validate`
#     but only a single metric is permitted.
#
#     If `None`, the estimator's default scorer (if available) is used.
#
# cv : int, cross-validation generator or an iterable, default=None
#     Determines the cross-validation splitting strategy.
#     Possible inputs for cv are:
#
#     - `None`, to use the default 5-fold cross validation,
#     - int, to specify the number of folds in a `(Stratified)KFold`,
#     - :term:`CV splitter`,
#     - An iterable that generates (train, test) splits as arrays of indices.
#
#     For `int`/`None` inputs, if the estimator is a classifier and `y` is
#     either binary or multiclass, :class:`StratifiedKFold` is used. In all
#     other cases, :class:`KFold` is used. These splitters are instantiated
#     with `shuffle=False` so the splits will be the same across calls.
#
#     Refer :ref:`User Guide <cross_validation>` for the various
#     cross-validation strategies that can be used here.
#
#     .. versionchanged:: 0.22
#         `cv` default value if `None` changed from 3-fold to 5-fold.
#
# n_jobs : int, default=None
#     Number of jobs to run in parallel. Training the estimator and computing
#     the score are parallelized over the cross-validation splits.
#     ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.
#     ``-1`` means using all processors. See :term:`Glossary <n_jobs>`
#     for more details.
#
# verbose : int, default=0
#     The verbosity level.
#
# fit_params : dict, default=None
#     Parameters to pass to the fit method of the estimator.
#
# pre_dispatch : int or str, default='2*n_jobs'
#     Controls the number of jobs that get dispatched during parallel
#     execution. Reducing this number can be useful to avoid an
#     explosion of memory consumption when more jobs get dispatched
#     than CPUs can process. This parameter can be:
#
#         - ``None``, in which case all the jobs are immediately
#           created and spawned. Use this for lightweight and
#           fast-running jobs, to avoid delays due to on-demand
#           spawning of the jobs
#
#         - An int, giving the exact number of total jobs that are
#           spawned
#
#         - A str, giving an expression as a function of n_jobs,
#           as in '2*n_jobs'
#
# error_score : 'raise' or numeric, default=np.nan
#     Value to assign to the score if an error occurs in estimator fitting.
#     If set to 'raise', the error is raised.
#     If a numeric value is given, FitFailedWarning is raised.
#
#     .. versionadded:: 0.20
#
# Returns
# -------
# scores : ndarray of float of shape=(len(list(cv)),)
#     Array of scores of the estimator for each run of the cross validation.
#
# Examples
# --------
# >>> from sklearn import datasets, linear_model
# >>> from sklearn.model_selection import cross_val_score
# >>> diabetes = datasets.load_diabetes()
# >>> X = diabetes.data[:150]
# >>> y = diabetes.target[:150]
# >>> lasso = linear_model.Lasso()
# >>> print(cross_val_score(lasso, X, y, cv=3))
# [0.33150734 0.08022311 0.03531764]
#
# See Also
# ---------
# cross_validate : To run cross-validation on multiple metrics and also to
#     return train scores, fit times and score times.
#
# cross_val_predict : Get predictions from each split of cross-validation for
#     diagnostic purposes.
#
# sklearn.metrics.make_scorer : Make a scorer from a performance metric or
#     loss function.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>sklearn.preprocessing._data.MinMaxScaler</u></summary>
# <blockquote>
# <code>
# Transform features by scaling each feature to a given range.
#
# This estimator scales and translates each feature individually such
# that it is in the given range on the training set, e.g. between
# zero and one.
#
# The transformation is given by::
#
#     X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
#     X_scaled = X_std * (max - min) + min
#
# where min, max = feature_range.
#
# This transformation is often used as an alternative to zero mean,
# unit variance scaling.
#
# Read more in the :ref:`User Guide <preprocessing_scaler>`.
#
# Parameters
# ----------
# feature_range : tuple (min, max), default=(0, 1)
#     Desired range of transformed data.
#
# copy : bool, default=True
#     Set to False to perform inplace row normalization and avoid a
#     copy (if the input is already a numpy array).
#
# clip : bool, default=False
#     Set to True to clip transformed values of held-out data to
#     provided `feature range`.
#
#     .. versionadded:: 0.24
#
# Attributes
# ----------
# min_ : ndarray of shape (n_features,)
#     Per feature adjustment for minimum. Equivalent to
#     ``min - X.min(axis=0) * self.scale_``
#
# scale_ : ndarray of shape (n_features,)
#     Per feature relative scaling of the data. Equivalent to
#     ``(max - min) / (X.max(axis=0) - X.min(axis=0))``
#
#     .. versionadded:: 0.17
#        *scale_* attribute.
#
# data_min_ : ndarray of shape (n_features,)
#     Per feature minimum seen in the data
#
#     .. versionadded:: 0.17
#        *data_min_*
#
# data_max_ : ndarray of shape (n_features,)
#     Per feature maximum seen in the data
#
#     .. versionadded:: 0.17
#        *data_max_*
#
# data_range_ : ndarray of shape (n_features,)
#     Per feature range ``(data_max_ - data_min_)`` seen in the data
#
#     .. versionadded:: 0.17
#        *data_range_*
#
# n_features_in_ : int
#     Number of features seen during :term:`fit`.
#
#     .. versionadded:: 0.24
#
# n_samples_seen_ : int
#     The number of samples processed by the estimator.
#     It will be reset on new calls to fit, but increments across
#     ``partial_fit`` calls.
#
# feature_names_in_ : ndarray of shape (`n_features_in_`,)
#     Names of features seen during :term:`fit`. Defined only when `X`
#     has feature names that are all strings.
#
#     .. versionadded:: 1.0
#
# See Also
# --------
# minmax_scale : Equivalent function without the estimator API.
#
# Notes
# -----
# NaNs are treated as missing values: disregarded in fit, and maintained in
# transform.
#
# For a comparison of the different scalers, transformers, and normalizers,
# see :ref:`examples/preprocessing/plot_all_scaling.py
# <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
#
# Examples
# --------
# >>> from sklearn.preprocessing import MinMaxScaler
# >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
# >>> scaler = MinMaxScaler()
# >>> print(scaler.fit(data))
# MinMaxScaler()
# >>> print(scaler.data_max_)
# [ 1. 18.]
# >>> print(scaler.transform(data))
# [[0.   0.  ]
#  [0.25 0.25]
#  [0.5  0.5 ]
#  [1.   1.  ]]
# >>> print(scaler.transform([[2, 2]]))
# [[1.5 0. ]]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>sklearn.preprocessing._data.MinMaxScaler.fit</u></summary>
# <blockquote>
# <code>
# Compute the minimum and maximum to be used for later scaling.
#
# Parameters
# ----------
# X : array-like of shape (n_samples, n_features)
#     The data used to compute the per-feature minimum and maximum
#     used for later scaling along the features axis.
#
# y : None
#     Ignored.
#
# Returns
# -------
# self : object
#     Fitted scaler.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>statsmodels</b>
# <ul>
# <li>
# <details><summary><u>statsmodels.base.model.Model.from_formula</u></summary>
# <blockquote>
# <code>
# Create a Model from a formula and dataframe.
#
# Parameters
# ----------
# formula : str or generic Formula object
#     The formula specifying the model.
# data : array_like
#     The data for the model. See Notes.
# subset : array_like
#     An array-like object of booleans, integers, or index values that
#     indicate the subset of df to use in the model. Assumes df is a
#     `pandas.DataFrame`.
# drop_cols : array_like
#     Columns to drop from the design matrix.  Cannot be used to
#     drop terms involving categoricals.
# *args
#     Additional positional argument that are passed to the model.
# **kwargs
#     These are passed to the model with one exception. The
#     ``eval_env`` keyword is passed to patsy. It can be either a
#     :class:`patsy:patsy.EvalEnvironment` object or an integer
#     indicating the depth of the namespace to use. For example, the
#     default ``eval_env=0`` uses the calling namespace. If you wish
#     to use a "clean" environment set ``eval_env=-1``.
#
# Returns
# -------
# model
#     The model instance.
#
# Notes
# -----
# data must define __getitem__ with the keys in the formula terms
# args and kwargs are passed on to the model instantiation. E.g.,
# a numpy structured or rec array, a dictionary, or a pandas DataFrame.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>statsmodels.tsa.arima_model.ARIMA</u></summary>
# <blockquote>
# <code>
# ARIMA has been deprecated in favor of the new implementation
#
# See Also
# --------
# statsmodels.tsa.arima.model.ARIMA
#     ARIMA models with a variety of parameter estimators
# statsmodels.tsa.statespace.SARIMAX
#     SARIMAX models estimated using MLE
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>statsmodels.tsa.statespace.sarimax.SARIMAX</u></summary>
# <blockquote>
# <code>
# Seasonal AutoRegressive Integrated Moving Average with eXogenous regressors
# model
#
# Parameters
# ----------
# endog : array_like
#     The observed time-series process :math:`y`
# exog : array_like, optional
#     Array of exogenous regressors, shaped nobs x k.
# order : iterable or iterable of iterables, optional
#     The (p,d,q) order of the model for the number of AR parameters,
#     differences, and MA parameters. `d` must be an integer
#     indicating the integration order of the process, while
#     `p` and `q` may either be an integers indicating the AR and MA
#     orders (so that all lags up to those orders are included) or else
#     iterables giving specific AR and / or MA lags to include. Default is
#     an AR(1) model: (1,0,0).
# seasonal_order : iterable, optional
#     The (P,D,Q,s) order of the seasonal component of the model for the
#     AR parameters, differences, MA parameters, and periodicity.
#     `D` must be an integer indicating the integration order of the process,
#     while `P` and `Q` may either be an integers indicating the AR and MA
#     orders (so that all lags up to those orders are included) or else
#     iterables giving specific AR and / or MA lags to include. `s` is an
#     integer giving the periodicity (number of periods in season), often it
#     is 4 for quarterly data or 12 for monthly data. Default is no seasonal
#     effect.
# trend : str{'n','c','t','ct'} or iterable, optional
#     Parameter controlling the deterministic trend polynomial :math:`A(t)`.
#     Can be specified as a string where 'c' indicates a constant (i.e. a
#     degree zero component of the trend polynomial), 't' indicates a
#     linear trend with time, and 'ct' is both. Can also be specified as an
#     iterable defining the non-zero polynomial exponents to include, in
#     increasing order. For example, `[1,1,0,1]` denotes
#     :math:`a + bt + ct^3`. Default is to not include a trend component.
# measurement_error : bool, optional
#     Whether or not to assume the endogenous observations `endog` were
#     measured with error. Default is False.
# time_varying_regression : bool, optional
#     Used when an explanatory variables, `exog`, are provided
#     to select whether or not coefficients on the exogenous regressors are
#     allowed to vary over time. Default is False.
# mle_regression : bool, optional
#     Whether or not to use estimate the regression coefficients for the
#     exogenous variables as part of maximum likelihood estimation or through
#     the Kalman filter (i.e. recursive least squares). If
#     `time_varying_regression` is True, this must be set to False. Default
#     is True.
# simple_differencing : bool, optional
#     Whether or not to use partially conditional maximum likelihood
#     estimation. If True, differencing is performed prior to estimation,
#     which discards the first :math:`s D + d` initial rows but results in a
#     smaller state-space formulation. See the Notes section for important
#     details about interpreting results when this option is used. If False,
#     the full SARIMAX model is put in state-space form so that all
#     datapoints can be used in estimation. Default is False.
# enforce_stationarity : bool, optional
#     Whether or not to transform the AR parameters to enforce stationarity
#     in the autoregressive component of the model. Default is True.
# enforce_invertibility : bool, optional
#     Whether or not to transform the MA parameters to enforce invertibility
#     in the moving average component of the model. Default is True.
# hamilton_representation : bool, optional
#     Whether or not to use the Hamilton representation of an ARMA process
#     (if True) or the Harvey representation (if False). Default is False.
# concentrate_scale : bool, optional
#     Whether or not to concentrate the scale (variance of the error term)
#     out of the likelihood. This reduces the number of parameters estimated
#     by maximum likelihood by one, but standard errors will then not
#     be available for the scale parameter.
# trend_offset : int, optional
#     The offset at which to start time trend values. Default is 1, so that
#     if `trend='t'` the trend is equal to 1, 2, ..., nobs. Typically is only
#     set when the model created by extending a previous dataset.
# use_exact_diffuse : bool, optional
#     Whether or not to use exact diffuse initialization for non-stationary
#     states. Default is False (in which case approximate diffuse
#     initialization is used).
# **kwargs
#     Keyword arguments may be used to provide default values for state space
#     matrices or for Kalman filtering options. See `Representation`, and
#     `KalmanFilter` for more details.
#
# Attributes
# ----------
# measurement_error : bool
#     Whether or not to assume the endogenous
#     observations `endog` were measured with error.
# state_error : bool
#     Whether or not the transition equation has an error component.
# mle_regression : bool
#     Whether or not the regression coefficients for
#     the exogenous variables were estimated via maximum
#     likelihood estimation.
# state_regression : bool
#     Whether or not the regression coefficients for
#     the exogenous variables are included as elements
#     of the state space and estimated via the Kalman
#     filter.
# time_varying_regression : bool
#     Whether or not coefficients on the exogenous
#     regressors are allowed to vary over time.
# simple_differencing : bool
#     Whether or not to use partially conditional maximum likelihood
#     estimation.
# enforce_stationarity : bool
#     Whether or not to transform the AR parameters
#     to enforce stationarity in the autoregressive
#     component of the model.
# enforce_invertibility : bool
#     Whether or not to transform the MA parameters
#     to enforce invertibility in the moving average
#     component of the model.
# hamilton_representation : bool
#     Whether or not to use the Hamilton representation of an ARMA process.
# trend : str{'n','c','t','ct'} or iterable
#     Parameter controlling the deterministic
#     trend polynomial :math:`A(t)`. See the class
#     parameter documentation for more information.
# polynomial_ar : ndarray
#     Array containing autoregressive lag polynomial lags, ordered from
#     lowest degree to highest. The polynomial begins with lag 0.
#     Initialized with ones, unless a coefficient is constrained to be
#     zero (in which case it is zero).
# polynomial_ma : ndarray
#     Array containing moving average lag polynomial lags, ordered from
#     lowest degree to highest. Initialized with ones, unless a coefficient
#     is constrained to be zero (in which case it is zero).
# polynomial_seasonal_ar : ndarray
#     Array containing seasonal moving average lag
#     polynomial lags, ordered from lowest degree
#     to highest. Initialized with ones, unless a
#     coefficient is constrained to be zero (in which
#     case it is zero).
# polynomial_seasonal_ma : ndarray
#     Array containing seasonal moving average lag
#     polynomial lags, ordered from lowest degree
#     to highest. Initialized with ones, unless a
#     coefficient is constrained to be zero (in which
#     case it is zero).
# polynomial_trend : ndarray
#     Array containing trend polynomial coefficients,
#     ordered from lowest degree to highest. Initialized
#     with ones, unless a coefficient is constrained to be
#     zero (in which case it is zero).
# k_ar : int
#     Highest autoregressive order in the model, zero-indexed.
# k_ar_params : int
#     Number of autoregressive parameters to be estimated.
# k_diff : int
#     Order of integration.
# k_ma : int
#     Highest moving average order in the model, zero-indexed.
# k_ma_params : int
#     Number of moving average parameters to be estimated.
# seasonal_periods : int
#     Number of periods in a season.
# k_seasonal_ar : int
#     Highest seasonal autoregressive order in the model, zero-indexed.
# k_seasonal_ar_params : int
#     Number of seasonal autoregressive parameters to be estimated.
# k_seasonal_diff : int
#     Order of seasonal integration.
# k_seasonal_ma : int
#     Highest seasonal moving average order in the model, zero-indexed.
# k_seasonal_ma_params : int
#     Number of seasonal moving average parameters to be estimated.
# k_trend : int
#     Order of the trend polynomial plus one (i.e. the constant polynomial
#     would have `k_trend=1`).
# k_exog : int
#     Number of exogenous regressors.
#
# Notes
# -----
# The SARIMA model is specified :math:`(p, d, q) \times (P, D, Q)_s`.
#
# .. math::
#
#     \phi_p (L) \tilde \phi_P (L^s) \Delta^d \Delta_s^D y_t = A(t) +
#         \theta_q (L) \tilde \theta_Q (L^s) \zeta_t
#
# In terms of a univariate structural model, this can be represented as
#
# .. math::
#
#     y_t & = u_t + \eta_t \\
#     \phi_p (L) \tilde \phi_P (L^s) \Delta^d \Delta_s^D u_t & = A(t) +
#         \theta_q (L) \tilde \theta_Q (L^s) \zeta_t
#
# where :math:`\eta_t` is only applicable in the case of measurement error
# (although it is also used in the case of a pure regression model, i.e. if
# p=q=0).
#
# In terms of this model, regression with SARIMA errors can be represented
# easily as
#
# .. math::
#
#     y_t & = \beta_t x_t + u_t \\
#     \phi_p (L) \tilde \phi_P (L^s) \Delta^d \Delta_s^D u_t & = A(t) +
#         \theta_q (L) \tilde \theta_Q (L^s) \zeta_t
#
# this model is the one used when exogenous regressors are provided.
#
# Note that the reduced form lag polynomials will be written as:
#
# .. math::
#
#     \Phi (L) \equiv \phi_p (L) \tilde \phi_P (L^s) \\
#     \Theta (L) \equiv \theta_q (L) \tilde \theta_Q (L^s)
#
# If `mle_regression` is True, regression coefficients are treated as
# additional parameters to be estimated via maximum likelihood. Otherwise
# they are included as part of the state with a diffuse initialization.
# In this case, however, with approximate diffuse initialization, results
# can be sensitive to the initial variance.
#
# This class allows two different underlying representations of ARMA models
# as state space models: that of Hamilton and that of Harvey. Both are
# equivalent in the sense that they are analytical representations of the
# ARMA model, but the state vectors of each have different meanings. For
# this reason, maximum likelihood does not result in identical parameter
# estimates and even the same set of parameters will result in different
# loglikelihoods.
#
# The Harvey representation is convenient because it allows integrating
# differencing into the state vector to allow using all observations for
# estimation.
#
# In this implementation of differenced models, the Hamilton representation
# is not able to accommodate differencing in the state vector, so
# `simple_differencing` (which performs differencing prior to estimation so
# that the first d + sD observations are lost) must be used.
#
# Many other packages use the Hamilton representation, so that tests against
# Stata and R require using it along with simple differencing (as Stata
# does).
#
# If `filter_concentrated = True` is used, then the scale of the model is
# concentrated out of the likelihood. A benefit of this is that there the
# dimension of the parameter vector is reduced so that numerical maximization
# of the log-likelihood function may be faster and more stable. If this
# option in a model with measurement error, it is important to note that the
# estimated measurement error parameter will be relative to the scale, and
# is named "snr.measurement_error" instead of "var.measurement_error". To
# compute the variance of the measurement error in this case one would
# multiply `snr.measurement_error` parameter by the scale.
#
# If `simple_differencing = True` is used, then the `endog` and `exog` data
# are differenced prior to putting the model in state-space form. This has
# the same effect as if the user differenced the data prior to constructing
# the model, which has implications for using the results:
#
# - Forecasts and predictions will be about the *differenced* data, not about
#   the original data. (while if `simple_differencing = False` is used, then
#   forecasts and predictions will be about the original data).
# - If the original data has an Int64Index, a new RangeIndex will be created
#   for the differenced data that starts from one, and forecasts and
#   predictions will use this new index.
#
# Detailed information about state space models can be found in [1]_. Some
# specific references are:
#
# - Chapter 3.4 describes ARMA and ARIMA models in state space form (using
#   the Harvey representation), and gives references for basic seasonal
#   models and models with a multiplicative form (for example the airline
#   model). It also shows a state space model for a full ARIMA process (this
#   is what is done here if `simple_differencing=False`).
# - Chapter 3.6 describes estimating regression effects via the Kalman filter
#   (this is performed if `mle_regression` is False), regression with
#   time-varying coefficients, and regression with ARMA errors (recall from
#   above that if regression effects are present, the model estimated by this
#   class is regression with SARIMA errors).
# - Chapter 8.4 describes the application of an ARMA model to an example
#   dataset. A replication of this section is available in an example
#   IPython notebook in the documentation.
#
# References
# ----------
# .. [1] Durbin, James, and Siem Jan Koopman. 2012.
#    Time Series Analysis by State Space Methods: Second Edition.
#    Oxford University Press.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>statsmodels.tsa.stattools.acf</u></summary>
# <blockquote>
# <code>
# Calculate the autocorrelation function.
#
# Parameters
# ----------
# x : array_like
#    The time series data.
# adjusted : bool, default False
#    If True, then denominators for autocovariance are n-k, otherwise n.
# nlags : int, optional
#     Number of lags to return autocorrelation for. If not provided,
#     uses min(10 * np.log10(nobs), nobs - 1). The returned value
#     includes lag 0 (ie., 1) so size of the acf vector is (nlags + 1,).
# qstat : bool, default False
#     If True, returns the Ljung-Box q statistic for each autocorrelation
#     coefficient.  See q_stat for more information.
# fft : bool, default True
#     If True, computes the ACF via FFT.
# alpha : scalar, default None
#     If a number is given, the confidence intervals for the given level are
#     returned. For instance if alpha=.05, 95 % confidence intervals are
#     returned where the standard deviation is computed according to
#     Bartlett"s formula.
# bartlett_confint : bool, default True
#     Confidence intervals for ACF values are generally placed at 2
#     standard errors around r_k. The formula used for standard error
#     depends upon the situation. If the autocorrelations are being used
#     to test for randomness of residuals as part of the ARIMA routine,
#     the standard errors are determined assuming the residuals are white
#     noise. The approximate formula for any lag is that standard error
#     of each r_k = 1/sqrt(N). See section 9.4 of [2] for more details on
#     the 1/sqrt(N) result. For more elementary discussion, see section 5.3.2
#     in [3].
#     For the ACF of raw data, the standard error at a lag k is
#     found as if the right model was an MA(k-1). This allows the possible
#     interpretation that if all autocorrelations past a certain lag are
#     within the limits, the model might be an MA of order defined by the
#     last significant autocorrelation. In this case, a moving average
#     model is assumed for the data and the standard errors for the
#     confidence intervals should be generated using Bartlett's formula.
#     For more details on Bartlett formula result, see section 7.2 in [2].
# missing : str, default "none"
#     A string in ["none", "raise", "conservative", "drop"] specifying how
#     the NaNs are to be treated. "none" performs no checks. "raise" raises
#     an exception if NaN values are found. "drop" removes the missing
#     observations and then estimates the autocovariances treating the
#     non-missing as contiguous. "conservative" computes the autocovariance
#     using nan-ops so that nans are removed when computing the mean
#     and cross-products that are used to estimate the autocovariance.
#     When using "conservative", n is set to the number of non-missing
#     observations.
#
# Returns
# -------
# acf : ndarray
#     The autocorrelation function for lags 0, 1, ..., nlags. Shape
#     (nlags+1,).
# confint : ndarray, optional
#     Confidence intervals for the ACF at lags 0, 1, ..., nlags. Shape
#     (nlags + 1, 2). Returned if alpha is not None.
# qstat : ndarray, optional
#     The Ljung-Box Q-Statistic for lags 1, 2, ..., nlags (excludes lag
#     zero). Returned if q_stat is True.
# pvalues : ndarray, optional
#     The p-values associated with the Q-statistics for lags 1, 2, ...,
#     nlags (excludes lag zero). Returned if q_stat is True.
#
# Notes
# -----
# The acf at lag 0 (ie., 1) is returned.
#
# For very long time series it is recommended to use fft convolution instead.
# When fft is False uses a simple, direct estimator of the autocovariances
# that only computes the first nlag + 1 values. This can be much faster when
# the time series is long and only a small number of autocovariances are
# needed.
#
# If adjusted is true, the denominator for the autocovariance is adjusted
# for the loss of data.
#
# References
# ----------
# .. [1] Parzen, E., 1963. On spectral analysis with missing observations
#    and amplitude modulation. Sankhya: The Indian Journal of
#    Statistics, Series A, pp.383-392.
# .. [2] Brockwell and Davis, 1987. Time Series Theory and Methods
# .. [3] Brockwell and Davis, 2010. Introduction to Time Series and
#    Forecasting, 2nd edition.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>statsmodels.tsa.stattools.adfuller</u></summary>
# <blockquote>
# <code>
# Augmented Dickey-Fuller unit root test.
#
# The Augmented Dickey-Fuller test can be used to test for a unit root in a
# univariate process in the presence of serial correlation.
#
# Parameters
# ----------
# x : array_like, 1d
#     The data series to test.
# maxlag : {None, int}
#     Maximum lag which is included in test, default value of
#     12*(nobs/100)^{1/4} is used when ``None``.
# regression : {"c","ct","ctt","n"}
#     Constant and trend order to include in regression.
#
#     * "c" : constant only (default).
#     * "ct" : constant and trend.
#     * "ctt" : constant, and linear and quadratic trend.
#     * "n" : no constant, no trend.
#
# autolag : {"AIC", "BIC", "t-stat", None}
#     Method to use when automatically determining the lag length among the
#     values 0, 1, ..., maxlag.
#
#     * If "AIC" (default) or "BIC", then the number of lags is chosen
#       to minimize the corresponding information criterion.
#     * "t-stat" based choice of maxlag.  Starts with maxlag and drops a
#       lag until the t-statistic on the last lag length is significant
#       using a 5%-sized test.
#     * If None, then the number of included lags is set to maxlag.
# store : bool
#     If True, then a result instance is returned additionally to
#     the adf statistic. Default is False.
# regresults : bool, optional
#     If True, the full regression results are returned. Default is False.
#
# Returns
# -------
# adf : float
#     The test statistic.
# pvalue : float
#     MacKinnon's approximate p-value based on MacKinnon (1994, 2010).
# usedlag : int
#     The number of lags used.
# nobs : int
#     The number of observations used for the ADF regression and calculation
#     of the critical values.
# critical values : dict
#     Critical values for the test statistic at the 1 %, 5 %, and 10 %
#     levels. Based on MacKinnon (2010).
# icbest : float
#     The maximized information criterion if autolag is not None.
# resstore : ResultStore, optional
#     A dummy class with results attached as attributes.
#
# Notes
# -----
# The null hypothesis of the Augmented Dickey-Fuller is that there is a unit
# root, with the alternative that there is no unit root. If the pvalue is
# above a critical size, then we cannot reject that there is a unit root.
#
# The p-values are obtained through regression surface approximation from
# MacKinnon 1994, but using the updated 2010 tables. If the p-value is close
# to significant, then the critical values should be used to judge whether
# to reject the null.
#
# The autolag option and maxlag for it are described in Greene.
#
# References
# ----------
# .. [1] W. Green.  "Econometric Analysis," 5th ed., Pearson, 2003.
#
# .. [2] Hamilton, J.D.  "Time Series Analysis".  Princeton, 1994.
#
# .. [3] MacKinnon, J.G. 1994.  "Approximate asymptotic distribution functions for
#     unit-root and cointegration tests.  `Journal of Business and Economic
#     Statistics` 12, 167-76.
#
# .. [4] MacKinnon, J.G. 2010. "Critical Values for Cointegration Tests."  Queen"s
#     University, Dept of Economics, Working Papers.  Available at
#     http://ideas.repec.org/p/qed/wpaper/1227.html
#
# Examples
# --------
# See example notebook
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>statsmodels.tsa.stattools.arma_order_select_ic</u></summary>
# <blockquote>
# <code>
# Compute information criteria for many ARMA models.
#
# Parameters
# ----------
# y : array_like
#     Array of time-series data.
# max_ar : int
#     Maximum number of AR lags to use. Default 4.
# max_ma : int
#     Maximum number of MA lags to use. Default 2.
# ic : str, list
#     Information criteria to report. Either a single string or a list
#     of different criteria is possible.
# trend : str
#     The trend to use when fitting the ARMA models.
# model_kw : dict
#     Keyword arguments to be passed to the ``ARMA`` model.
# fit_kw : dict
#     Keyword arguments to be passed to ``ARMA.fit``.
#
# Returns
# -------
# Bunch
#     Dict-like object with attribute access. Each ic is an attribute with a
#     DataFrame for the results. The AR order used is the row index. The ma
#     order used is the column index. The minimum orders are available as
#     ``ic_min_order``.
#
# Notes
# -----
# This method can be used to tentatively identify the order of an ARMA
# process, provided that the time series is stationary and invertible. This
# function computes the full exact MLE estimate of each model and can be,
# therefore a little slow. An implementation using approximate estimates
# will be provided in the future. In the meantime, consider passing
# {method : "css"} to fit_kw.
#
# Examples
# --------
#
# >>> from statsmodels.tsa.arima_process import arma_generate_sample
# >>> import statsmodels.api as sm
# >>> import numpy as np
#
# >>> arparams = np.array([.75, -.25])
# >>> maparams = np.array([.65, .35])
# >>> arparams = np.r_[1, -arparams]
# >>> maparam = np.r_[1, maparams]
# >>> nobs = 250
# >>> np.random.seed(2014)
# >>> y = arma_generate_sample(arparams, maparams, nobs)
# >>> res = sm.tsa.arma_order_select_ic(y, ic=["aic", "bic"], trend="n")
# >>> res.aic_min_order
# >>> res.bic_min_order
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>statsmodels.tsa.stattools.pacf</u></summary>
# <blockquote>
# <code>
# Partial autocorrelation estimate.
#
# Parameters
# ----------
# x : array_like
#     Observations of time series for which pacf is calculated.
# nlags : int, optional
#     Number of lags to return autocorrelation for. If not provided,
#     uses min(10 * np.log10(nobs), nobs // 2 - 1). The returned value
#     includes lag 0 (ie., 1) so size of the pacf vector is (nlags + 1,).
# method : str, default "ywunbiased"
#     Specifies which method for the calculations to use.
#
#     - "yw" or "ywadjusted" : Yule-Walker with sample-size adjustment in
#       denominator for acovf. Default.
#     - "ywm" or "ywmle" : Yule-Walker without adjustment.
#     - "ols" : regression of time series on lags of it and on constant.
#     - "ols-inefficient" : regression of time series on lags using a single
#       common sample to estimate all pacf coefficients.
#     - "ols-adjusted" : regression of time series on lags with a bias
#       adjustment.
#     - "ld" or "ldadjusted" : Levinson-Durbin recursion with bias
#       correction.
#     - "ldb" or "ldbiased" : Levinson-Durbin recursion without bias
#       correction.
#
# alpha : float, optional
#     If a number is given, the confidence intervals for the given level are
#     returned. For instance if alpha=.05, 95 % confidence intervals are
#     returned where the standard deviation is computed according to
#     1/sqrt(len(x)).
#
# Returns
# -------
# pacf : ndarray
#     The partial autocorrelations for lags 0, 1, ..., nlags. Shape
#     (nlags+1,).
# confint : ndarray, optional
#     Confidence intervals for the PACF at lags 0, 1, ..., nlags. Shape
#     (nlags + 1, 2). Returned if alpha is not None.
#
# See Also
# --------
# statsmodels.tsa.stattools.acf
#     Estimate the autocorrelation function.
# statsmodels.tsa.stattools.pacf
#     Partial autocorrelation estimation.
# statsmodels.tsa.stattools.pacf_yw
#      Partial autocorrelation estimation using Yule-Walker.
# statsmodels.tsa.stattools.pacf_ols
#     Partial autocorrelation estimation using OLS.
# statsmodels.tsa.stattools.pacf_burg
#     Partial autocorrelation estimation using Burg"s method.
#
# Notes
# -----
# Based on simulation evidence across a range of low-order ARMA models,
# the best methods based on root MSE are Yule-Walker (MLW), Levinson-Durbin
# (MLE) and Burg, respectively. The estimators with the lowest bias included
# included these three in addition to OLS and OLS-adjusted.
#
# Yule-Walker (adjusted) and Levinson-Durbin (adjusted) performed
# consistently worse than the other options.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>warnings</b>
# <ul>
# <li>
# <details><summary><u>warnings.filterwarnings</u></summary>
# <blockquote>
# <code>
# Insert an entry into the list of warnings filters (at the front).
#
# 'action' -- one of "error", "ignore", "always", "default", "module",
#             or "once"
# 'message' -- a regex that the warning message must match
# 'category' -- a class that the warning must be a subclass of
# 'module' -- a regex that the module name must match
# 'lineno' -- an integer line number, 0 matches all warnings
# 'append' -- if true, append to the list of filters
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>1. Library Loading</h1>  <a id='1'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>warnings</h2>
# <ul>
# <li>
# <details><summary><u>warnings.filterwarnings</u></summary>
# <blockquote>
# <code>
# Insert an entry into the list of warnings filters (at the front).
#
# 'action' -- one of "error", "ignore", "always", "default", "module",
#             or "once"
# 'message' -- a regex that the warning message must match
# 'category' -- a class that the warning must be a subclass of
# 'module' -- a regex that the module name must match
# 'lineno' -- an integer line number, 0 matches all warnings
# 'append' -- if true, append to the list of filters
#
# </code>
# <a href='#1'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %% _uuid="8f2839f25d086af736a60e9eeb907d3b93b6e0e5" _cell_guid="b1076dfc-b9ad-4769-8c92-a6c4dae69d19"
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import random as rd # generating random numbers
import datetime # manipulating date formats
import matplotlib.pyplot as plt # basic plotting
import seaborn as sns # for prettier plots
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pandas.plotting import autocorrelation_plot
from statsmodels.tsa.stattools import adfuller, acf, pacf,arma_order_select_ic
import statsmodels.formula.api as smf
import statsmodels.tsa.api as smt
import statsmodels.api as sm
import scipy.stats as scs
import warnings
warnings.filterwarnings("ignore")


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>2. Data Preparation</h1>  <a id='2'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.io.parsers.readers.read_csv</u></summary>
# <blockquote>
# <code>
# Read a comma-separated values (csv) file into DataFrame.
#
# Also supports optionally iterating or breaking of the file
# into chunks.
#
# Additional help can be found in the online docs for
# `IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
#
# Parameters
# ----------
# filepath_or_buffer : str, path object or file-like object
#     Any valid string path is acceptable. The string could be a URL. Valid
#     URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
#     expected. A local file could be: file://localhost/path/to/table.csv.
#
#     If you want to pass in a path object, pandas accepts any ``os.PathLike``.
#
#     By file-like object, we refer to objects with a ``read()`` method, such as
#     a file handle (e.g. via builtin ``open`` function) or ``StringIO``.
# sep : str, default ','
#     Delimiter to use. If sep is None, the C engine cannot automatically detect
#     the separator, but the Python parsing engine can, meaning the latter will
#     be used and automatically detect the separator by Python's builtin sniffer
#     tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
#     different from ``'\s+'`` will be interpreted as regular expressions and
#     will also force the use of the Python parsing engine. Note that regex
#     delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
# delimiter : str, default ``None``
#     Alias for sep.
# header : int, list of int, None, default 'infer'
#     Row number(s) to use as the column names, and the start of the
#     data.  Default behavior is to infer the column names: if no names
#     are passed the behavior is identical to ``header=0`` and column
#     names are inferred from the first line of the file, if column
#     names are passed explicitly then the behavior is identical to
#     ``header=None``. Explicitly pass ``header=0`` to be able to
#     replace existing names. The header can be a list of integers that
#     specify row locations for a multi-index on the columns
#     e.g. [0,1,3]. Intervening rows that are not specified will be
#     skipped (e.g. 2 in this example is skipped). Note that this
#     parameter ignores commented lines and empty lines if
#     ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
#     data rather than the first line of the file.
# names : array-like, optional
#     List of column names to use. If the file contains a header row,
#     then you should explicitly pass ``header=0`` to override the column names.
#     Duplicates in this list are not allowed.
# index_col : int, str, sequence of int / str, or False, optional, default ``None``
#   Column(s) to use as the row labels of the ``DataFrame``, either given as
#   string name or column index. If a sequence of int / str is given, a
#   MultiIndex is used.
#
#   Note: ``index_col=False`` can be used to force pandas to *not* use the first
#   column as the index, e.g. when you have a malformed file with delimiters at
#   the end of each line.
# usecols : list-like or callable, optional
#     Return a subset of the columns. If list-like, all elements must either
#     be positional (i.e. integer indices into the document columns) or strings
#     that correspond to column names provided either by the user in `names` or
#     inferred from the document header row(s). If ``names`` are given, the document
#     header row(s) are not taken into account. For example, a valid list-like
#     `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
#     Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
#     To instantiate a DataFrame from ``data`` with element order preserved use
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
#     in ``['foo', 'bar']`` order or
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
#     for ``['bar', 'foo']`` order.
#
#     If callable, the callable function will be evaluated against the column
#     names, returning names where the callable function evaluates to True. An
#     example of a valid callable argument would be ``lambda x: x.upper() in
#     ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
#     parsing time and lower memory usage.
# squeeze : bool, default False
#     If the parsed data only contains one column then return a Series.
#
#     .. deprecated:: 1.4.0
#         Append ``.squeeze("columns")`` to the call to ``read_csv`` to squeeze
#         the data.
# prefix : str, optional
#     Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
#
#     .. deprecated:: 1.4.0
#        Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
# mangle_dupe_cols : bool, default True
#     Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
#     'X'...'X'. Passing in False will cause data to be overwritten if there
#     are duplicate names in the columns.
# dtype : Type name or dict of column -> type, optional
#     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,
#     'c': 'Int64'}
#     Use `str` or `object` together with suitable `na_values` settings
#     to preserve and not interpret dtype.
#     If converters are specified, they will be applied INSTEAD
#     of dtype conversion.
# engine : {'c', 'python', 'pyarrow'}, optional
#     Parser engine to use. The C and pyarrow engines are faster, while the python engine
#     is currently more feature-complete. Multithreading is currently only supported by
#     the pyarrow engine.
#
#     .. versionadded:: 1.4.0
#
#         The "pyarrow" engine was added as an *experimental* engine, and some features
#         are unsupported, or may not work correctly, with this engine.
# converters : dict, optional
#     Dict of functions for converting values in certain columns. Keys can either
#     be integers or column labels.
# true_values : list, optional
#     Values to consider as True.
# false_values : list, optional
#     Values to consider as False.
# skipinitialspace : bool, default False
#     Skip spaces after delimiter.
# skiprows : list-like, int or callable, optional
#     Line numbers to skip (0-indexed) or number of lines to skip (int)
#     at the start of the file.
#
#     If callable, the callable function will be evaluated against the row
#     indices, returning True if the row should be skipped and False otherwise.
#     An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
# skipfooter : int, default 0
#     Number of lines at bottom of file to skip (Unsupported with engine='c').
# nrows : int, optional
#     Number of rows of file to read. Useful for reading pieces of large files.
# na_values : scalar, str, list-like, or dict, optional
#     Additional strings to recognize as NA/NaN. If dict passed, specific
#     per-column NA values.  By default the following values are interpreted as
#     NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',
#     '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',
#     'nan', 'null'.
# keep_default_na : bool, default True
#     Whether or not to include the default NaN values when parsing the data.
#     Depending on whether `na_values` is passed in, the behavior is as follows:
#
#     * If `keep_default_na` is True, and `na_values` are specified, `na_values`
#       is appended to the default NaN values used for parsing.
#     * If `keep_default_na` is True, and `na_values` are not specified, only
#       the default NaN values are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are specified, only
#       the NaN values specified `na_values` are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are not specified, no
#       strings will be parsed as NaN.
#
#     Note that if `na_filter` is passed in as False, the `keep_default_na` and
#     `na_values` parameters will be ignored.
# na_filter : bool, default True
#     Detect missing value markers (empty strings and the value of na_values). In
#     data without any NAs, passing na_filter=False can improve the performance
#     of reading a large file.
# verbose : bool, default False
#     Indicate number of NA values placed in non-numeric columns.
# skip_blank_lines : bool, default True
#     If True, skip over blank lines rather than interpreting as NaN values.
# parse_dates : bool or list of int or names or list of lists or dict, default False
#     The behavior is as follows:
#
#     * boolean. If True -> try parsing the index.
#     * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
#       each as a separate date column.
#     * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
#       a single date column.
#     * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call
#       result 'foo'
#
#     If a column or index cannot be represented as an array of datetimes,
#     say because of an unparsable value or a mixture of timezones, the column
#     or index will be returned unaltered as an object data type. For
#     non-standard datetime parsing, use ``pd.to_datetime`` after
#     ``pd.read_csv``. To parse an index or column with a mixture of timezones,
#     specify ``date_parser`` to be a partially-applied
#     :func:`pandas.to_datetime` with ``utc=True``. See
#     :ref:`io.csv.mixed_timezones` for more.
#
#     Note: A fast-path exists for iso8601-formatted dates.
# infer_datetime_format : bool, default False
#     If True and `parse_dates` is enabled, pandas will attempt to infer the
#     format of the datetime strings in the columns, and if it can be inferred,
#     switch to a faster method of parsing them. In some cases this can increase
#     the parsing speed by 5-10x.
# keep_date_col : bool, default False
#     If True and `parse_dates` specifies combining multiple columns then
#     keep the original columns.
# date_parser : function, optional
#     Function to use for converting a sequence of string columns to an array of
#     datetime instances. The default uses ``dateutil.parser.parser`` to do the
#     conversion. Pandas will try to call `date_parser` in three different ways,
#     advancing to the next if an exception occurs: 1) Pass one or more arrays
#     (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
#     string values from the columns defined by `parse_dates` into a single array
#     and pass that; and 3) call `date_parser` once for each row using one or
#     more strings (corresponding to the columns defined by `parse_dates`) as
#     arguments.
# dayfirst : bool, default False
#     DD/MM format dates, international and European format.
# cache_dates : bool, default True
#     If True, use a cache of unique, converted dates to apply the datetime
#     conversion. May produce significant speed-up when parsing duplicate
#     date strings, especially ones with timezone offsets.
#
#     .. versionadded:: 0.25.0
# iterator : bool, default False
#     Return TextFileReader object for iteration or getting chunks with
#     ``get_chunk()``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# chunksize : int, optional
#     Return TextFileReader object for iteration.
#     See the `IO Tools docs
#     <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
#     for more information on ``iterator`` and ``chunksize``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# compression : str or dict, default 'infer'
#     For on-the-fly decompression of on-disk data. If 'infer' and '%s' is
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). If using
#     'zip', the ZIP file must contain only one data file to be read in. Set to
#     ``None`` for no decompression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for Zstandard decompression using a
#     custom compression dictionary:
#     ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
#
#     .. versionchanged:: 1.4.0 Zstandard support.
#
# thousands : str, optional
#     Thousands separator.
# decimal : str, default '.'
#     Character to recognize as decimal point (e.g. use ',' for European data).
# lineterminator : str (length 1), optional
#     Character to break file into lines. Only valid with C parser.
# quotechar : str (length 1), optional
#     The character used to denote the start and end of a quoted item. Quoted
#     items can include the delimiter and it will be ignored.
# quoting : int or csv.QUOTE_* instance, default 0
#     Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
#     QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
# doublequote : bool, default ``True``
#    When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
#    whether or not to interpret two consecutive quotechar elements INSIDE a
#    field as a single ``quotechar`` element.
# escapechar : str (length 1), optional
#     One-character string used to escape other characters.
# comment : str, optional
#     Indicates remainder of line should not be parsed. If found at the beginning
#     of a line, the line will be ignored altogether. This parameter must be a
#     single character. Like empty lines (as long as ``skip_blank_lines=True``),
#     fully commented lines are ignored by the parameter `header` but not by
#     `skiprows`. For example, if ``comment='#'``, parsing
#     ``#empty\na,b,c\n1,2,3`` with ``header=0`` will result in 'a,b,c' being
#     treated as the header.
# encoding : str, optional
#     Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
#     standard encodings
#     <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
#
#     .. versionchanged:: 1.2
#
#        When ``encoding`` is ``None``, ``errors="replace"`` is passed to
#        ``open()``. Otherwise, ``errors="strict"`` is passed to ``open()``.
#        This behavior was previously only the case for ``engine="python"``.
#
#     .. versionchanged:: 1.3.0
#
#        ``encoding_errors`` is a new argument. ``encoding`` has no longer an
#        influence on how encoding errors are handled.
#
# encoding_errors : str, optional, default "strict"
#     How encoding errors are treated. `List of possible values
#     <https://docs.python.org/3/library/codecs.html#error-handlers>`_ .
#
#     .. versionadded:: 1.3.0
#
# dialect : str or csv.Dialect, optional
#     If provided, this parameter will override values (default or not) for the
#     following parameters: `delimiter`, `doublequote`, `escapechar`,
#     `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
#     override values, a ParserWarning will be issued. See csv.Dialect
#     documentation for more details.
# error_bad_lines : bool, optional, default ``None``
#     Lines with too many fields (e.g. a csv line with too many commas) will by
#     default cause an exception to be raised, and no DataFrame will be returned.
#     If False, then these "bad lines" will be dropped from the DataFrame that is
#     returned.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# warn_bad_lines : bool, optional, default ``None``
#     If error_bad_lines is False, and warn_bad_lines is True, a warning for each
#     "bad line" will be output.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# on_bad_lines : {'error', 'warn', 'skip'} or callable, default 'error'
#     Specifies what to do upon encountering a bad line (a line with too many fields).
#     Allowed values are :
#
#         - 'error', raise an Exception when a bad line is encountered.
#         - 'warn', raise a warning when a bad line is encountered and skip that line.
#         - 'skip', skip bad lines without raising or warning when they are encountered.
#
#     .. versionadded:: 1.3.0
#
#         - callable, function with signature
#           ``(bad_line: list[str]) -> list[str] | None`` that will process a single
#           bad line. ``bad_line`` is a list of strings split by the ``sep``.
#           If the function returns ``None``, the bad line will be ignored.
#           If the function returns a new list of strings with more elements than
#           expected, a ``ParserWarning`` will be emitted while dropping extra elements.
#           Only supported when ``engine="python"``
#
#     .. versionadded:: 1.4.0
#
# delim_whitespace : bool, default False
#     Specifies whether or not whitespace (e.g. ``' '`` or ``'    '``) will be
#     used as the sep. Equivalent to setting ``sep='\s+'``. If this option
#     is set to True, nothing should be passed in for the ``delimiter``
#     parameter.
# low_memory : bool, default True
#     Internally process the file in chunks, resulting in lower memory use
#     while parsing, but possibly mixed type inference.  To ensure no mixed
#     types either set False, or specify the type with the `dtype` parameter.
#     Note that the entire file is read into a single DataFrame regardless,
#     use the `chunksize` or `iterator` parameter to return the data in chunks.
#     (Only valid with C parser).
# memory_map : bool, default False
#     If a filepath is provided for `filepath_or_buffer`, map the file object
#     directly onto memory and access the data directly from there. Using this
#     option can improve performance because there is no longer any I/O overhead.
# float_precision : str, optional
#     Specifies which converter the C engine should use for floating-point
#     values. The options are ``None`` or 'high' for the ordinary converter,
#     'legacy' for the original lower precision pandas converter, and
#     'round_trip' for the round-trip converter.
#
#     .. versionchanged:: 1.2
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2
#
# Returns
# -------
# DataFrame or TextParser
#     A comma-separated values (csv) file is returned as two-dimensional
#     data structure with labeled axes.
#
# See Also
# --------
# DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_fwf : Read a table of fixed-width formatted lines into DataFrame.
#
# Examples
# --------
# >>> pd.read_csv('data.csv')  # doctest: +SKIP
#
# </code>
# <a href='#2'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %% _uuid="d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" _cell_guid="79c7e3d0-c299-4dcb-8224-4455121ee9b0"
sales=pd.read_csv("../input/competitive-data-science-predict-future-sales/sales_train.csv")
item_cat=pd.read_csv("../input/competitive-data-science-predict-future-sales/item_categories.csv")
item=pd.read_csv("../input/competitive-data-science-predict-future-sales/items.csv")
sub=pd.read_csv("../input/competitive-data-science-predict-future-sales/sample_submission.csv")
shops=pd.read_csv("../input/competitive-data-science-predict-future-sales/shops.csv")
test=pd.read_csv("../input/competitive-data-science-predict-future-sales/test.csv")


# %%
sales.date=sales.date.apply(lambda x:datetime.datetime.strptime(x, '%d.%m.%Y'))


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>4. Data Preparation | Feature Engineering</h1>  <a id='4'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u></summary>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
monthly_sales=sales.groupby(["date_block_num","shop_id","item_id"])["date","item_price","item_cnt_day"].agg({"date":["min",'max'],"item_price":"mean","item_cnt_day":"sum"})


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>5. Data Preparation | Feature Engineering | Visualization</h1>  <a id='5'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u></summary>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#5'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <h2 class='hglib'>matplotlib</h2>
# <ul>
# <li>
# <details><summary><u>matplotlib.pyplot.figure</u></summary>
# <blockquote>
# <code>
# Create a new figure, or activate an existing figure.
#
# Parameters
# ----------
# num : int or str or `.Figure`, optional
#     A unique identifier for the figure.
#
#     If a figure with that identifier already exists, this figure is made
#     active and returned. An integer refers to the ``Figure.number``
#     attribute, a string refers to the figure label.
#
#     If there is no figure with the identifier or *num* is not given, a new
#     figure is created, made active and returned.  If *num* is an int, it
#     will be used for the ``Figure.number`` attribute, otherwise, an
#     auto-generated integer value is used (starting at 1 and incremented
#     for each new figure). If *num* is a string, the figure label and the
#     window title is set to this value.
#
# figsize : (float, float), default: :rc:`figure.figsize`
#     Width, height in inches.
#
# dpi : float, default: :rc:`figure.dpi`
#     The resolution of the figure in dots-per-inch.
#
# facecolor : color, default: :rc:`figure.facecolor`
#     The background color.
#
# edgecolor : color, default: :rc:`figure.edgecolor`
#     The border color.
#
# frameon : bool, default: True
#     If False, suppress drawing the figure frame.
#
# FigureClass : subclass of `~matplotlib.figure.Figure`
#     Optionally use a custom `.Figure` instance.
#
# clear : bool, default: False
#     If True and the figure already exists, then it is cleared.
#
# tight_layout : bool or dict, default: :rc:`figure.autolayout`
#     If ``False`` use *subplotpars*. If ``True`` adjust subplot
#     parameters using `.tight_layout` with default padding.
#     When providing a dict containing the keys ``pad``, ``w_pad``,
#     ``h_pad``, and ``rect``, the default `.tight_layout` paddings
#     will be overridden.
#
# constrained_layout : bool, default: :rc:`figure.constrained_layout.use`
#     If ``True`` use constrained layout to adjust positioning of plot
#     elements.  Like ``tight_layout``, but designed to be more
#     flexible.  See
#     :doc:`/tutorials/intermediate/constrainedlayout_guide`
#     for examples.  (Note: does not work with `add_subplot` or
#     `~.pyplot.subplot2grid`.)
#
#
# **kwargs : optional
#     See `~.matplotlib.figure.Figure` for other possible arguments.
#
# Returns
# -------
# `~matplotlib.figure.Figure`
#     The `.Figure` instance returned will also be passed to
#     new_figure_manager in the backends, which allows to hook custom
#     `.Figure` classes into the pyplot interface. Additional kwargs will be
#     passed to the `.Figure` init function.
#
# Notes
# -----
# If you are creating many figures, make sure you explicitly call
# `.pyplot.close` on the figures you are not using, because this will
# enable pyplot to properly clean up the memory.
#
# `~matplotlib.rcParams` defines the default values, which can be modified
# in the matplotlibrc file.
#
# </code>
# <a href='#5'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.title</u></summary>
# <blockquote>
# <code>
# Set a title for the Axes.
#
# Set one of the three available Axes titles. The available titles
# are positioned above the Axes in the center, flush with the left
# edge, and flush with the right edge.
#
# Parameters
# ----------
# label : str
#     Text to use for the title
#
# fontdict : dict
#     A dictionary controlling the appearance of the title text,
#     the default *fontdict* is::
#
#        {'fontsize': rcParams['axes.titlesize'],
#         'fontweight': rcParams['axes.titleweight'],
#         'color': rcParams['axes.titlecolor'],
#         'verticalalignment': 'baseline',
#         'horizontalalignment': loc}
#
# loc : {'center', 'left', 'right'}, default: :rc:`axes.titlelocation`
#     Which title to set.
#
# y : float, default: :rc:`axes.titley`
#     Vertical Axes location for the title (1.0 is the top).  If
#     None (the default) and :rc:`axes.titley` is also None, y is
#     determined automatically to avoid decorators on the Axes.
#
# pad : float, default: :rc:`axes.titlepad`
#     The offset of the title from the top of the Axes, in points.
#
# Returns
# -------
# `.Text`
#     The matplotlib text instance representing the title
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     Other keyword arguments are text properties, see `.Text` for a list
#     of valid text properties.
#
# </code>
# <a href='#5'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.ylabel</u></summary>
# <blockquote>
# <code>
# Set the label for the y-axis.
#
# Parameters
# ----------
# ylabel : str
#     The label text.
#
# labelpad : float, default: :rc:`axes.labelpad`
#     Spacing in points from the Axes bounding box including ticks
#     and tick labels.  If None, the previous value is left as is.
#
# loc : {'bottom', 'center', 'top'}, default: :rc:`yaxis.labellocation`
#     The label position. This is a high-level alternative for passing
#     parameters *y* and *horizontalalignment*.
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     `.Text` properties control the appearance of the label.
#
# See Also
# --------
# text : Documents the properties supported by `.Text`.
#
# </code>
# <a href='#5'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.xlabel</u></summary>
# <blockquote>
# <code>
# Set the label for the x-axis.
#
# Parameters
# ----------
# xlabel : str
#     The label text.
#
# labelpad : float, default: :rc:`axes.labelpad`
#     Spacing in points from the Axes bounding box including ticks
#     and tick labels.  If None, the previous value is left as is.
#
# loc : {'left', 'center', 'right'}, default: :rc:`xaxis.labellocation`
#     The label position. This is a high-level alternative for passing
#     parameters *x* and *horizontalalignment*.
#
# Other Parameters
# ----------------
# **kwargs : `.Text` properties
#     `.Text` properties control the appearance of the label.
#
# See Also
# --------
# text : Documents the properties supported by `.Text`.
#
# </code>
# <a href='#5'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>matplotlib.pyplot.show</u></summary>
# <blockquote>
# <code>
# Display all open figures.
#
# Parameters
# ----------
# block : bool, optional
#     Whether to wait for all figures to be closed before returning.
#
#     If `True` block and run the GUI main loop until all figure windows
#     are closed.
#
#     If `False` ensure that all figure windows are displayed and return
#     immediately.  In this case, you are responsible for ensuring
#     that the event loop is running to have responsive figures.
#
#     Defaults to True in non-interactive mode and to False in interactive
#     mode (see `.pyplot.isinteractive`).
#
# See Also
# --------
# ion : Enable interactive mode, which shows / updates the figure after
#       every plotting command, so that calling ``show()`` is not necessary.
# ioff : Disable interactive mode.
# savefig : Save the figure to an image file instead of showing it on screen.
#
# Notes
# -----
# **Saving figures to file and showing a window at the same time**
#
# If you want an image file as well as a user interface window, use
# `.pyplot.savefig` before `.pyplot.show`. At the end of (a blocking)
# ``show()`` the figure is closed and thus unregistered from pyplot. Calling
# `.pyplot.savefig` afterwards would save a new and thus empty figure. This
# limitation of command order does not apply if the show is non-blocking or
# if you keep a reference to the figure and use `.Figure.savefig`.
#
# **Auto-show in jupyter notebooks**
#
# The jupyter backends (activated via ``%matplotlib inline``,
# ``%matplotlib notebook``, or ``%matplotlib widget``), call ``show()`` at
# the end of every cell by default. Thus, you usually don't have to call it
# explicitly there.
#
# </code>
# <a href='#5'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <h2 class='hglib'>seaborn</h2>
# <ul>
# <li>
# <details><summary><u>seaborn.categorical.barplot</u></summary>
# <blockquote>
# <code>
# Show point estimates and confidence intervals as rectangular bars.
#
# A bar plot represents an estimate of central tendency for a numeric
# variable with the height of each rectangle and provides some indication of
# the uncertainty around that estimate using error bars. Bar plots include 0
# in the quantitative axis range, and they are a good choice when 0 is a
# meaningful value for the quantitative variable, and you want to make
# comparisons against it.
#
# For datasets where 0 is not a meaningful value, a point plot will allow you
# to focus on differences between levels of one or more categorical
# variables.
#
# It is also important to keep in mind that a bar plot shows only the mean
# (or other estimator) value, but in many cases it may be more informative to
# show the distribution of values at each level of the categorical variables.
# In that case, other approaches such as a box or violin plot may be more
# appropriate.
#
#
# Input data can be passed in a variety of formats, including:
#
# - Vectors of data represented as lists, numpy arrays, or pandas Series
#   objects passed directly to the ``x``, ``y``, and/or ``hue`` parameters.
# - A "long-form" DataFrame, in which case the ``x``, ``y``, and ``hue``
#   variables will determine how the data are plotted.
# - A "wide-form" DataFrame, such that each numeric column will be plotted.
# - An array or list of vectors.
#
# In most cases, it is possible to use numpy or Python objects, but pandas
# objects are preferable because the associated names will be used to
# annotate the axes. Additionally, you can use Categorical types for the
# grouping variables to control the order of plot elements.    
#
# This function always treats one of the variables as categorical and
# draws data at ordinal positions (0, 1, ... n) on the relevant axis, even
# when the data has a numeric or date type.
#
# See the :ref:`tutorial <categorical_tutorial>` for more information.    
#
# Parameters
# ----------
# x, y, hue : names of variables in ``data`` or vector data, optional
#     Inputs for plotting long-form data. See examples for interpretation.        
# data : DataFrame, array, or list of arrays, optional
#     Dataset for plotting. If ``x`` and ``y`` are absent, this is
#     interpreted as wide-form. Otherwise it is expected to be long-form.    
# order, hue_order : lists of strings, optional
#     Order to plot the categorical levels in, otherwise the levels are
#     inferred from the data objects.        
# estimator : callable that maps vector -> scalar, optional
#     Statistical function to estimate within each categorical bin.
# ci : float or "sd" or None, optional
#     Size of confidence intervals to draw around estimated values.  If
#     "sd", skip bootstrapping and draw the standard deviation of the
#     observations. If ``None``, no bootstrapping will be performed, and
#     error bars will not be drawn.
# n_boot : int, optional
#     Number of bootstrap iterations to use when computing confidence
#     intervals.
# units : name of variable in ``data`` or vector data, optional
#     Identifier of sampling units, which will be used to perform a
#     multilevel bootstrap and account for repeated measures design.
# seed : int, numpy.random.Generator, or numpy.random.RandomState, optional
#     Seed or random number generator for reproducible bootstrapping.    
# orient : "v" | "h", optional
#     Orientation of the plot (vertical or horizontal). This is usually
#     inferred based on the type of the input variables, but it can be used
#     to resolve ambiguity when both `x` and `y` are numeric or when
#     plotting wide-form data.    
# color : matplotlib color, optional
#     Color for all of the elements, or seed for a gradient palette.    
# palette : palette name, list, or dict
#     Colors to use for the different levels of the ``hue`` variable. Should
#     be something that can be interpreted by :func:`color_palette`, or a
#     dictionary mapping hue levels to matplotlib colors.    
# saturation : float, optional
#     Proportion of the original saturation to draw colors at. Large patches
#     often look better with slightly desaturated colors, but set this to
#     ``1`` if you want the plot colors to perfectly match the input color
#     spec.    
# errcolor : matplotlib color
#     Color for the lines that represent the confidence interval.
# errwidth : float, optional
#     Thickness of error bar lines (and caps).         
# capsize : float, optional
#     Width of the "caps" on error bars.
#
# dodge : bool, optional
#     When hue nesting is used, whether elements should be shifted along the
#     categorical axis.    
# ax : matplotlib Axes, optional
#     Axes object to draw the plot onto, otherwise uses the current Axes.    
# kwargs : key, value mappings
#     Other keyword arguments are passed through to
#     :meth:`matplotlib.axes.Axes.bar`.
#
# Returns
# -------
# ax : matplotlib Axes
#     Returns the Axes object with the plot drawn onto it.    
#
# See Also
# --------
# countplot : Show the counts of observations in each categorical bin.    
# pointplot : Show point estimates and confidence intervals using scatterplot
#             glyphs.    
# catplot : Combine a categorical plot with a :class:`FacetGrid`.    
#
# Examples
# --------
#
# Draw a set of vertical bar plots grouped by a categorical variable:
#
# .. plot::
#     :context: close-figs
#
#     >>> import seaborn as sns
#     >>> sns.set_theme(style="whitegrid")
#     >>> tips = sns.load_dataset("tips")
#     >>> ax = sns.barplot(x="day", y="total_bill", data=tips)
#
# Draw a set of vertical bars with nested grouping by a two variables:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="total_bill", hue="sex", data=tips)
#
# Draw a set of horizontal bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="tip", y="day", data=tips)
#
# Control bar order by passing an explicit order:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="time", y="tip", data=tips,
#     ...                  order=["Dinner", "Lunch"])
#
# Use median as the estimate of central tendency:
#
# .. plot::
#     :context: close-figs
#
#     >>> from numpy import median
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, estimator=median)
#
# Show the standard error of the mean with the error bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, ci=68)
#
# Show standard deviation of observations instead of a confidence interval:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, ci="sd")
#
# Add "caps" to the error bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="tip", data=tips, capsize=.2)
#
# Use a different color palette for the bars:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="size", y="total_bill", data=tips,
#     ...                  palette="Blues_d")
#
# Use ``hue`` without changing bar position or width:
#
# .. plot::
#     :context: close-figs
#
#     >>> tips["weekend"] = tips["day"].isin(["Sat", "Sun"])
#     >>> ax = sns.barplot(x="day", y="total_bill", hue="weekend",
#     ...                  data=tips, dodge=False)
#
# Plot all bars in a single color:
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="size", y="total_bill", data=tips,
#     ...                  color="salmon", saturation=.5)
#
# Use :meth:`matplotlib.axes.Axes.bar` parameters to control the style.
#
# .. plot::
#     :context: close-figs
#
#     >>> ax = sns.barplot(x="day", y="total_bill", data=tips,
#     ...                  linewidth=2.5, facecolor=(1, 1, 1, 0),
#     ...                  errcolor=".2", edgecolor=".2")
#
# Use :func:`catplot` to combine a :func:`barplot` and a :class:`FacetGrid`.
# This allows grouping within additional categorical variables. Using
# :func:`catplot` is safer than using :class:`FacetGrid` directly, as it
# ensures synchronization of variable order across facets:
#
# .. plot::
#     :context: close-figs
#
#     >>> g = sns.catplot(x="sex", y="total_bill",
#     ...                 hue="smoker", col="time",
#     ...                 data=tips, kind="bar",
#     ...                 height=4, aspect=.7);
#
# </code>
# <a href='#5'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
x=item.groupby(['item_category_id']).count()
x=x.sort_values(by='item_id',ascending=False)
x=x.iloc[0:10].reset_index()
# #plot
plt.figure(figsize=(8,4))
ax= sns.barplot(x.item_category_id, x.item_id, alpha=0.8)
plt.title("Items per Category")
plt.ylabel('# of items', fontsize=12)
plt.xlabel('Category', fontsize=12)
plt.show()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>6. Data Preparation</h1>  <a id='6'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.head</u></summary>
# <blockquote>
# <code>
# Return the first `n` rows.
#
# This function returns the first `n` rows for the object based
# on position. It is useful for quickly testing if your object
# has the right type of data in it.
#
# For negative values of `n`, this function returns all rows except
# the last `n` rows, equivalent to ``df[:-n]``.
#
# Parameters
# ----------
# n : int, default 5
#     Number of rows to select.
#
# Returns
# -------
# same type as caller
#     The first `n` rows of the caller object.
#
# See Also
# --------
# DataFrame.tail: Returns the last `n` rows.
#
# Examples
# --------
# >>> df = pd.DataFrame({'animal': ['alligator', 'bee', 'falcon', 'lion',
# ...                    'monkey', 'parrot', 'shark', 'whale', 'zebra']})
# >>> df
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
# 5     parrot
# 6      shark
# 7      whale
# 8      zebra
#
# Viewing the first 5 lines
#
# >>> df.head()
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
#
# Viewing the first `n` lines (three in this case)
#
# >>> df.head(3)
#       animal
# 0  alligator
# 1        bee
# 2     falcon
#
# For negative values of `n`
#
# >>> df.head(-3)
#       animal
# 0  alligator
# 1        bee
# 2     falcon
# 3       lion
# 4     monkey
# 5     parrot
#
# </code>
# <a href='#6'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
sales.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>7. Data Preparation | Feature Engineering</h1>  <a id='7'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.groupby</u></summary>
# <blockquote>
# <code>
# Group DataFrame using a mapper or by a Series of columns.
#
# A groupby operation involves some combination of splitting the
# object, applying a function, and combining the results. This can be
# used to group large amounts of data and compute operations on these
# groups.
#
# Parameters
# ----------
# by : mapping, function, label, or list of labels
#     Used to determine the groups for the groupby.
#     If ``by`` is a function, it's called on each value of the object's
#     index. If a dict or Series is passed, the Series or dict VALUES
#     will be used to determine the groups (the Series' values are first
#     aligned; see ``.align()`` method). If a list or ndarray of length
#     equal to the selected axis is passed (see the `groupby user guide
#     <https://pandas.pydata.org/pandas-docs/stable/user_guide/groupby.html#splitting-an-object-into-groups>`_),
#     the values are used as-is to determine the groups. A label or list
#     of labels may be passed to group by the columns in ``self``.
#     Notice that a tuple is interpreted as a (single) key.
# axis : {0 or 'index', 1 or 'columns'}, default 0
#     Split along rows (0) or columns (1).
# level : int, level name, or sequence of such, default None
#     If the axis is a MultiIndex (hierarchical), group by a particular
#     level or levels.
# as_index : bool, default True
#     For aggregated output, return object with group labels as the
#     index. Only relevant for DataFrame input. as_index=False is
#     effectively "SQL-style" grouped output.
# sort : bool, default True
#     Sort group keys. Get better performance by turning this off.
#     Note this does not influence the order of observations within each
#     group. Groupby preserves the order of rows within each group.
# group_keys : bool, default True
#     When calling apply, add group keys to index to identify pieces.
# squeeze : bool, default False
#     Reduce the dimensionality of the return type if possible,
#     otherwise return a consistent type.
#
#     .. deprecated:: 1.1.0
#
# observed : bool, default False
#     This only applies if any of the groupers are Categoricals.
#     If True: only show observed values for categorical groupers.
#     If False: show all values for categorical groupers.
# dropna : bool, default True
#     If True, and if group keys contain NA values, NA values together
#     with row/column will be dropped.
#     If False, NA values will also be treated as the key in groups.
#
#     .. versionadded:: 1.1.0
#
# Returns
# -------
# DataFrameGroupBy
#     Returns a groupby object that contains information about the groups.
#
# See Also
# --------
# resample : Convenience method for frequency conversion and resampling
#     of time series.
#
# Notes
# -----
# See the `user guide
# <https://pandas.pydata.org/pandas-docs/stable/groupby.html>`__ for more
# detailed usage and examples, including splitting an object into groups,
# iterating through groups, selecting a group, aggregation, and more.
#
# Examples
# --------
# >>> df = pd.DataFrame({'Animal': ['Falcon', 'Falcon',
# ...                               'Parrot', 'Parrot'],
# ...                    'Max Speed': [380., 370., 24., 26.]})
# >>> df
#    Animal  Max Speed
# 0  Falcon      380.0
# 1  Falcon      370.0
# 2  Parrot       24.0
# 3  Parrot       26.0
# >>> df.groupby(['Animal']).mean()
#         Max Speed
# Animal
# Falcon      375.0
# Parrot       25.0
#
# **Hierarchical Indexes**
#
# We can groupby different levels of a hierarchical index
# using the `level` parameter:
#
# >>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
# ...           ['Captive', 'Wild', 'Captive', 'Wild']]
# >>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
# >>> df = pd.DataFrame({'Max Speed': [390., 350., 30., 20.]},
# ...                   index=index)
# >>> df
#                 Max Speed
# Animal Type
# Falcon Captive      390.0
#        Wild         350.0
# Parrot Captive       30.0
#        Wild          20.0
# >>> df.groupby(level=0).mean()
#         Max Speed
# Animal
# Falcon      370.0
# Parrot       25.0
# >>> df.groupby(level="Type").mean()
#          Max Speed
# Type
# Captive      210.0
# Wild         185.0
#
# We can also choose to include NA in group keys or not by setting
# `dropna` parameter, the default setting is `True`.
#
# >>> l = [[1, 2, 3], [1, None, 4], [2, 1, 3], [1, 2, 2]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by=["b"]).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
#
# >>> df.groupby(by=["b"], dropna=False).sum()
#     a   c
# b
# 1.0 2   3
# 2.0 2   5
# NaN 1   4
#
# >>> l = [["a", 12, 12], [None, 12.3, 33.], ["b", 12.3, 123], ["a", 1, 1]]
# >>> df = pd.DataFrame(l, columns=["a", "b", "c"])
#
# >>> df.groupby(by="a").sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
#
# >>> df.groupby(by="a", dropna=False).sum()
#     b     c
# a
# a   13.0   13.0
# b   12.3  123.0
# NaN 12.3   33.0
#
# </code>
# <a href='#7'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
df_sales = sales.groupby('date').item_cnt_day.sum().reset_index()
df_sales.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>8. Library Loading | Visualization</h1>  <a id='8'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>plotly</h2>
# <ul>
# <li>
# <details><summary><u>plotly.graph_objs._layout.Layout</u></summary>
# <blockquote>
# <code>
# Base class for the layout type. The Layout class itself is a
# code-generated subclass.
#
# </code>
# <a href='#8'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.graph_objs._figure.Figure</u></summary>
# <blockquote>
# <code>
# Base class for all figure types (both widget and non-widget)
#
# </code>
# <a href='#8'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.offline.offline.iplot</u></summary>
# <blockquote>
# <code>
# Draw plotly graphs inside an IPython or Jupyter notebook
#
# figure_or_data -- a plotly.graph_objs.Figure or plotly.graph_objs.Data or
#                   dict or list that describes a Plotly graph.
#                   See https://plot.ly/python/ for examples of
#                   graph descriptions.
#
# Keyword arguments:
# show_link (default=False) -- display a link in the bottom-right corner of
#                             of the chart that will export the chart to
#                             Plotly Cloud or Plotly Enterprise
# link_text (default='Export to plot.ly') -- the text of export link
# validate (default=True) -- validate that all of the keys in the figure
#                            are valid? omit if your version of plotly.js
#                            has become outdated with your version of
#                            graph_reference.json or if you need to include
#                            extra, unnecessary keys in your figure.
# image (default=None |'png' |'jpeg' |'svg' |'webp') -- This parameter sets
#     the format of the image to be downloaded, if we choose to download an
#     image. This parameter has a default value of None indicating that no
#     image should be downloaded. Please note: for higher resolution images
#     and more export options, consider using plotly.io.write_image. See
#     https://plot.ly/python/static-image-export/ for more details.
# filename (default='plot') -- Sets the name of the file your image
#     will be saved to. The extension should not be included.
# image_height (default=600) -- Specifies the height of the image in `px`.
# image_width (default=800) -- Specifies the width of the image in `px`.
# config (default=None) -- Plot view options dictionary. Keyword arguments
#     `show_link` and `link_text` set the associated options in this
#     dictionary if it doesn't contain them already.
# auto_play (default=True) -- Whether to automatically start the animation
#     sequence on page load, if the figure contains frames. Has no effect if
#     the figure does not contain frames.
# animation_opts (default=None) -- Dict of custom animation parameters that
#     are used for the automatically started animation on page load. This
#     dict is passed to the function Plotly.animate in Plotly.js. See
#     https://github.com/plotly/plotly.js/blob/master/src/plots/animation_attributes.js
#     for available options. Has no effect if the figure
#     does not contain frames, or auto_play is False.
#
# Example:
# ```
# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode()
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}])
# We can also download an image of the plot by setting the image to the
# format you want. e.g. `image='png'`
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}], image='png')
# ```
#
# animation_opts Example:
# ```
# from plotly.offline import iplot
# figure = {'data': [{'x': [0, 1], 'y': [0, 1]}],
#           'layout': {'xaxis': {'range': [0, 5], 'autorange': False},
#                      'yaxis': {'range': [0, 5], 'autorange': False},
#                      'title': 'Start Title'},
#           'frames': [{'data': [{'x': [1, 2], 'y': [1, 2]}]},
#                      {'data': [{'x': [1, 4], 'y': [1, 4]}]},
#                      {'data': [{'x': [3, 4], 'y': [3, 4]}],
#                       'layout': {'title': 'End Title'}}]}
# iplot(figure, animation_opts={'frame': {'duration': 1}})
# ```
#
# </code>
# <a href='#8'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
import plotly.offline as pyoff
import plotly.graph_objs as go
plot_data = [
    go.Scatter(
        x=df_sales['date'],
        y=df_sales['item_cnt_day'],
    )
]
plot_layout = go.Layout(
        title=' Sales'
    )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

# %%
df_diff = df_sales.copy()
df_diff['prev_sales'] = df_diff['item_cnt_day'].shift(1)
df_diff = df_diff.dropna()
df_diff['diff'] = (df_diff['item_cnt_day'] - df_diff['prev_sales'])
df_diff.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>10. Visualization</h1>  <a id='10'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>plotly</h2>
# <ul>
# <li>
# <details><summary><u>plotly.graph_objs._layout.Layout</u></summary>
# <blockquote>
# <code>
# Base class for the layout type. The Layout class itself is a
# code-generated subclass.
#
# </code>
# <a href='#10'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.graph_objs._figure.Figure</u></summary>
# <blockquote>
# <code>
# Base class for all figure types (both widget and non-widget)
#
# </code>
# <a href='#10'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>plotly.offline.offline.iplot</u></summary>
# <blockquote>
# <code>
# Draw plotly graphs inside an IPython or Jupyter notebook
#
# figure_or_data -- a plotly.graph_objs.Figure or plotly.graph_objs.Data or
#                   dict or list that describes a Plotly graph.
#                   See https://plot.ly/python/ for examples of
#                   graph descriptions.
#
# Keyword arguments:
# show_link (default=False) -- display a link in the bottom-right corner of
#                             of the chart that will export the chart to
#                             Plotly Cloud or Plotly Enterprise
# link_text (default='Export to plot.ly') -- the text of export link
# validate (default=True) -- validate that all of the keys in the figure
#                            are valid? omit if your version of plotly.js
#                            has become outdated with your version of
#                            graph_reference.json or if you need to include
#                            extra, unnecessary keys in your figure.
# image (default=None |'png' |'jpeg' |'svg' |'webp') -- This parameter sets
#     the format of the image to be downloaded, if we choose to download an
#     image. This parameter has a default value of None indicating that no
#     image should be downloaded. Please note: for higher resolution images
#     and more export options, consider using plotly.io.write_image. See
#     https://plot.ly/python/static-image-export/ for more details.
# filename (default='plot') -- Sets the name of the file your image
#     will be saved to. The extension should not be included.
# image_height (default=600) -- Specifies the height of the image in `px`.
# image_width (default=800) -- Specifies the width of the image in `px`.
# config (default=None) -- Plot view options dictionary. Keyword arguments
#     `show_link` and `link_text` set the associated options in this
#     dictionary if it doesn't contain them already.
# auto_play (default=True) -- Whether to automatically start the animation
#     sequence on page load, if the figure contains frames. Has no effect if
#     the figure does not contain frames.
# animation_opts (default=None) -- Dict of custom animation parameters that
#     are used for the automatically started animation on page load. This
#     dict is passed to the function Plotly.animate in Plotly.js. See
#     https://github.com/plotly/plotly.js/blob/master/src/plots/animation_attributes.js
#     for available options. Has no effect if the figure
#     does not contain frames, or auto_play is False.
#
# Example:
# ```
# from plotly.offline import init_notebook_mode, iplot
# init_notebook_mode()
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}])
# We can also download an image of the plot by setting the image to the
# format you want. e.g. `image='png'`
# iplot([{'x': [1, 2, 3], 'y': [5, 2, 7]}], image='png')
# ```
#
# animation_opts Example:
# ```
# from plotly.offline import iplot
# figure = {'data': [{'x': [0, 1], 'y': [0, 1]}],
#           'layout': {'xaxis': {'range': [0, 5], 'autorange': False},
#                      'yaxis': {'range': [0, 5], 'autorange': False},
#                      'title': 'Start Title'},
#           'frames': [{'data': [{'x': [1, 2], 'y': [1, 2]}]},
#                      {'data': [{'x': [1, 4], 'y': [1, 4]}]},
#                      {'data': [{'x': [3, 4], 'y': [3, 4]}],
#                       'layout': {'title': 'End Title'}}]}
# iplot(figure, animation_opts={'frame': {'duration': 1}})
# ```
#
# </code>
# <a href='#10'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
plot_data = [
    go.Scatter(
        x=df_diff['date'],
        y=df_diff['diff'],
    )
]
plot_layout = go.Layout(
        title='Montly Sales Diff'
    )
fig = go.Figure(data=plot_data, layout=plot_layout)
pyoff.iplot(fig)

# %%
df_supervised = df_diff.drop(['prev_sales'],axis=1)
for inc in range(1,13):
    field_name = 'lag_' + str(inc)
    df_supervised[field_name] = df_supervised['diff'].shift(inc)
df_supervised = df_supervised.dropna().reset_index(drop=True)

# %%
df_supervised.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>13. Library Loading | Model Building and Training</h1>  <a id='13'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>statsmodels</h2>
# <ul>
# <li>
# <details><summary><u>statsmodels.base.model.Model.from_formula</u></summary>
# <blockquote>
# <code>
# Create a Model from a formula and dataframe.
#
# Parameters
# ----------
# formula : str or generic Formula object
#     The formula specifying the model.
# data : array_like
#     The data for the model. See Notes.
# subset : array_like
#     An array-like object of booleans, integers, or index values that
#     indicate the subset of df to use in the model. Assumes df is a
#     `pandas.DataFrame`.
# drop_cols : array_like
#     Columns to drop from the design matrix.  Cannot be used to
#     drop terms involving categoricals.
# *args
#     Additional positional argument that are passed to the model.
# **kwargs
#     These are passed to the model with one exception. The
#     ``eval_env`` keyword is passed to patsy. It can be either a
#     :class:`patsy:patsy.EvalEnvironment` object or an integer
#     indicating the depth of the namespace to use. For example, the
#     default ``eval_env=0`` uses the calling namespace. If you wish
#     to use a "clean" environment set ``eval_env=-1``.
#
# Returns
# -------
# model
#     The model instance.
#
# Notes
# -----
# data must define __getitem__ with the keys in the formula terms
# args and kwargs are passed on to the model instantiation. E.g.,
# a numpy structured or rec array, a dictionary, or a pandas DataFrame.
#
# </code>
# <a href='#13'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
import statsmodels.formula.api as smf
# Define the regression formula
model = smf.ols(formula='diff ~ lag_1', data=df_supervised)
# Fit the regression
model_fit = model.fit()
# Extract the adjusted r-squared
regression_adj_rsq = model_fit.rsquared_adj
print(regression_adj_rsq)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>14. Library Loading | Model Building and Training</h1>  <a id='14'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>statsmodels</h2>
# <ul>
# <li>
# <details><summary><u>statsmodels.base.model.Model.from_formula</u></summary>
# <blockquote>
# <code>
# Create a Model from a formula and dataframe.
#
# Parameters
# ----------
# formula : str or generic Formula object
#     The formula specifying the model.
# data : array_like
#     The data for the model. See Notes.
# subset : array_like
#     An array-like object of booleans, integers, or index values that
#     indicate the subset of df to use in the model. Assumes df is a
#     `pandas.DataFrame`.
# drop_cols : array_like
#     Columns to drop from the design matrix.  Cannot be used to
#     drop terms involving categoricals.
# *args
#     Additional positional argument that are passed to the model.
# **kwargs
#     These are passed to the model with one exception. The
#     ``eval_env`` keyword is passed to patsy. It can be either a
#     :class:`patsy:patsy.EvalEnvironment` object or an integer
#     indicating the depth of the namespace to use. For example, the
#     default ``eval_env=0`` uses the calling namespace. If you wish
#     to use a "clean" environment set ``eval_env=-1``.
#
# Returns
# -------
# model
#     The model instance.
#
# Notes
# -----
# data must define __getitem__ with the keys in the formula terms
# args and kwargs are passed on to the model instantiation. E.g.,
# a numpy structured or rec array, a dictionary, or a pandas DataFrame.
#
# </code>
# <a href='#14'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
import statsmodels.formula.api as smf
# Define the regression formula
model = smf.ols(formula='diff ~ lag_1+lag_2+lag_3+lag_4+lag_5+lag_6+lag_7+lag_8+lag_9+lag_10+lag_11+lag_12', data=df_supervised)
# Fit the regression
model_fit = model.fit()
# Extract the adjusted r-squared
regression_adj_rsq = model_fit.rsquared_adj
print(regression_adj_rsq)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>15. Library Loading</h1>  <a id='15'></a><small><a href='#top_phases'>back to top</a></small>

# %%
from sklearn.preprocessing import MinMaxScaler
df_model = df_supervised.drop(['item_cnt_day','date'],axis=1)
train_set, test_set = df_model[0:-6].values, df_model[-6:].values

# %%
test_set


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>17. Data Preparation</h1>  <a id='17'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>sklearn</h2>
# <ul>
# <li>
# <details><summary><u>sklearn.preprocessing._data.MinMaxScaler</u></summary>
# <blockquote>
# <code>
# Transform features by scaling each feature to a given range.
#
# This estimator scales and translates each feature individually such
# that it is in the given range on the training set, e.g. between
# zero and one.
#
# The transformation is given by::
#
#     X_std = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0))
#     X_scaled = X_std * (max - min) + min
#
# where min, max = feature_range.
#
# This transformation is often used as an alternative to zero mean,
# unit variance scaling.
#
# Read more in the :ref:`User Guide <preprocessing_scaler>`.
#
# Parameters
# ----------
# feature_range : tuple (min, max), default=(0, 1)
#     Desired range of transformed data.
#
# copy : bool, default=True
#     Set to False to perform inplace row normalization and avoid a
#     copy (if the input is already a numpy array).
#
# clip : bool, default=False
#     Set to True to clip transformed values of held-out data to
#     provided `feature range`.
#
#     .. versionadded:: 0.24
#
# Attributes
# ----------
# min_ : ndarray of shape (n_features,)
#     Per feature adjustment for minimum. Equivalent to
#     ``min - X.min(axis=0) * self.scale_``
#
# scale_ : ndarray of shape (n_features,)
#     Per feature relative scaling of the data. Equivalent to
#     ``(max - min) / (X.max(axis=0) - X.min(axis=0))``
#
#     .. versionadded:: 0.17
#        *scale_* attribute.
#
# data_min_ : ndarray of shape (n_features,)
#     Per feature minimum seen in the data
#
#     .. versionadded:: 0.17
#        *data_min_*
#
# data_max_ : ndarray of shape (n_features,)
#     Per feature maximum seen in the data
#
#     .. versionadded:: 0.17
#        *data_max_*
#
# data_range_ : ndarray of shape (n_features,)
#     Per feature range ``(data_max_ - data_min_)`` seen in the data
#
#     .. versionadded:: 0.17
#        *data_range_*
#
# n_features_in_ : int
#     Number of features seen during :term:`fit`.
#
#     .. versionadded:: 0.24
#
# n_samples_seen_ : int
#     The number of samples processed by the estimator.
#     It will be reset on new calls to fit, but increments across
#     ``partial_fit`` calls.
#
# feature_names_in_ : ndarray of shape (`n_features_in_`,)
#     Names of features seen during :term:`fit`. Defined only when `X`
#     has feature names that are all strings.
#
#     .. versionadded:: 1.0
#
# See Also
# --------
# minmax_scale : Equivalent function without the estimator API.
#
# Notes
# -----
# NaNs are treated as missing values: disregarded in fit, and maintained in
# transform.
#
# For a comparison of the different scalers, transformers, and normalizers,
# see :ref:`examples/preprocessing/plot_all_scaling.py
# <sphx_glr_auto_examples_preprocessing_plot_all_scaling.py>`.
#
# Examples
# --------
# >>> from sklearn.preprocessing import MinMaxScaler
# >>> data = [[-1, 2], [-0.5, 6], [0, 10], [1, 18]]
# >>> scaler = MinMaxScaler()
# >>> print(scaler.fit(data))
# MinMaxScaler()
# >>> print(scaler.data_max_)
# [ 1. 18.]
# >>> print(scaler.transform(data))
# [[0.   0.  ]
#  [0.25 0.25]
#  [0.5  0.5 ]
#  [1.   1.  ]]
# >>> print(scaler.transform([[2, 2]]))
# [[1.5 0. ]]
#
# </code>
# <a href='#17'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>sklearn.preprocessing._data.MinMaxScaler.fit</u></summary>
# <blockquote>
# <code>
# Compute the minimum and maximum to be used for later scaling.
#
# Parameters
# ----------
# X : array-like of shape (n_samples, n_features)
#     The data used to compute the per-feature minimum and maximum
#     used for later scaling along the features axis.
#
# y : None
#     Ignored.
#
# Returns
# -------
# self : object
#     Fitted scaler.
#
# </code>
# <a href='#17'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
scaler = MinMaxScaler(feature_range=(-1, 1))
scaler = scaler.fit(train_set)

train_set = train_set.reshape(train_set.shape[0], train_set.shape[1])
train_set_scaled = scaler.transform(train_set)

test_set = test_set.reshape(test_set.shape[0], test_set.shape[1])
test_set_scaled = scaler.transform(test_set)

# %%
X_train, y_train = train_set_scaled[:, 1:], train_set_scaled[:, 0:1]
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test, y_test = test_set_scaled[:, 1:], test_set_scaled[:, 0:1]
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>19. Library Loading | Model Building and Training</h1>  <a id='19'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>keras</h2>
# <ul>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential</u></summary>
# <blockquote>
# <code>
# `Sequential` groups a linear stack of layers into a `tf.keras.Model`.
#
# `Sequential` provides training and inference features on this model.
#
# Examples:
#
# ```python
# Optionally, the first layer can receive an `input_shape` argument:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
# model.add(tf.keras.layers.Dense(4))
#
# This is identical to the following:
# model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape=(16,)))
# model.add(tf.keras.layers.Dense(8))
#
# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.weights not created yet
#
# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# model.add(tf.keras.layers.Dense(4))
# len(model.weights)
# Returns "4"
#
# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.build((None, 16))
# len(model.weights)
# Returns "4"
#
# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(1))
# model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
# model.fit(x, y, batch_size=32, epochs=10)
# ```
#
# </code>
# <a href='#19'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential.add</u></summary>
# <blockquote>
# <code>
# Adds a layer instance on top of the layer stack.
#
# Args:
#     layer: layer instance.
#
# Raises:
#     TypeError: If `layer` is not a layer instance.
#     ValueError: In case the `layer` argument does not
#         know its input shape.
#     ValueError: In case the `layer` argument has
#         multiple output tensors, or is already connected
#         somewhere else (forbidden in `Sequential` models).
#
# </code>
# <a href='#19'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.recurrent_v2.LSTM</u></summary>
# <blockquote>
# <code>
# Long Short-Term Memory layer - Hochreiter 1997.
#
# See [the Keras RNN API guide](https://www.tensorflow.org/guide/keras/rnn)
# for details about the usage of RNN API.
#
# Based on available runtime hardware and constraints, this layer
# will choose different implementations (cuDNN-based or pure-TensorFlow)
# to maximize the performance. If a GPU is available and all
# the arguments to the layer meet the requirement of the cuDNN kernel
# (see below for details), the layer will use a fast cuDNN implementation.
#
# The requirements to use the cuDNN implementation are:
#
# 1. `activation` == `tanh`
# 2. `recurrent_activation` == `sigmoid`
# 3. `recurrent_dropout` == 0
# 4. `unroll` is `False`
# 5. `use_bias` is `True`
# 6. Inputs, if use masking, are strictly right-padded.
# 7. Eager execution is enabled in the outermost context.
#
# For example:
#
# >>> inputs = tf.random.normal([32, 10, 8])
# >>> lstm = tf.keras.layers.LSTM(4)
# >>> output = lstm(inputs)
# >>> print(output.shape)
# (32, 4)
# >>> lstm = tf.keras.layers.LSTM(4, return_sequences=True, return_state=True)
# >>> whole_seq_output, final_memory_state, final_carry_state = lstm(inputs)
# >>> print(whole_seq_output.shape)
# (32, 10, 4)
# >>> print(final_memory_state.shape)
# (32, 4)
# >>> print(final_carry_state.shape)
# (32, 4)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     Default: hyperbolic tangent (`tanh`). If you pass `None`, no activation
#     is applied (ie. "linear" activation: `a(x) = x`).
#   recurrent_activation: Activation function to use for the recurrent step.
#     Default: sigmoid (`sigmoid`). If you pass `None`, no activation is
#     applied (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean (default `True`), whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix, used for
#     the linear transformation of the inputs. Default: `glorot_uniform`.
#   recurrent_initializer: Initializer for the `recurrent_kernel` weights
#     matrix, used for the linear transformation of the recurrent state.
#     Default: `orthogonal`.
#   bias_initializer: Initializer for the bias vector. Default: `zeros`.
#   unit_forget_bias: Boolean (default `True`). If True, add 1 to the bias of
#     the forget gate at initialization. Setting it to true will also force
#     `bias_initializer="zeros"`. This is recommended in [Jozefowicz et
#         al.](http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf).
#   kernel_regularizer: Regularizer function applied to the `kernel` weights
#     matrix. Default: `None`.
#   recurrent_regularizer: Regularizer function applied to the
#     `recurrent_kernel` weights matrix. Default: `None`.
#   bias_regularizer: Regularizer function applied to the bias vector. Default:
#     `None`.
#   activity_regularizer: Regularizer function applied to the output of the
#     layer (its "activation"). Default: `None`.
#   kernel_constraint: Constraint function applied to the `kernel` weights
#     matrix. Default: `None`.
#   recurrent_constraint: Constraint function applied to the `recurrent_kernel`
#     weights matrix. Default: `None`.
#   bias_constraint: Constraint function applied to the bias vector. Default:
#     `None`.
#   dropout: Float between 0 and 1. Fraction of the units to drop for the linear
#     transformation of the inputs. Default: 0.
#   recurrent_dropout: Float between 0 and 1. Fraction of the units to drop for
#     the linear transformation of the recurrent state. Default: 0.
#   return_sequences: Boolean. Whether to return the last output. in the output
#     sequence, or the full sequence. Default: `False`.
#   return_state: Boolean. Whether to return the last state in addition to the
#     output. Default: `False`.
#   go_backwards: Boolean (default `False`). If True, process the input sequence
#     backwards and return the reversed sequence.
#   stateful: Boolean (default `False`). If True, the last state for each sample
#     at index i in a batch will be used as initial state for the sample of
#     index i in the following batch.
#   time_major: The shape format of the `inputs` and `outputs` tensors.
#     If True, the inputs and outputs will be in shape
#     `[timesteps, batch, feature]`, whereas in the False case, it will be
#     `[batch, timesteps, feature]`. Using `time_major = True` is a bit more
#     efficient because it avoids transposes at the beginning and end of the
#     RNN calculation. However, most TensorFlow data is batch-major, so by
#     default this function accepts input and emits output in batch-major
#     form.
#   unroll: Boolean (default `False`). If True, the network will be unrolled,
#     else a symbolic loop will be used. Unrolling can speed-up a RNN, although
#     it tends to be more memory-intensive. Unrolling is only suitable for short
#     sequences.
#
# Call arguments:
#   inputs: A 3D tensor with shape `[batch, timesteps, feature]`.
#   mask: Binary tensor of shape `[batch, timesteps]` indicating whether
#     a given timestep should be masked (optional, defaults to `None`).
#     An individual `True` entry indicates that the corresponding timestep
#     should be utilized, while a `False` entry indicates that the corresponding
#     timestep should be ignored.
#   training: Python boolean indicating whether the layer should behave in
#     training mode or in inference mode. This argument is passed to the cell
#     when calling it. This is only relevant if `dropout` or
#     `recurrent_dropout` is used (optional, defaults to `None`).
#   initial_state: List of initial state tensors to be passed to the first
#     call of the cell (optional, defaults to `None` which causes creation
#     of zero-filled initial state tensors).
#
# </code>
# <a href='#19'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.core.dense.Dense</u></summary>
# <blockquote>
# <code>
# Just your regular densely-connected NN layer.
#
# `Dense` implements the operation:
# `output = activation(dot(input, kernel) + bias)`
# where `activation` is the element-wise activation function
# passed as the `activation` argument, `kernel` is a weights matrix
# created by the layer, and `bias` is a bias vector created by the layer
# (only applicable if `use_bias` is `True`). These are all attributes of
# `Dense`.
#
# Note: If the input to the layer has a rank greater than 2, then `Dense`
# computes the dot product between the `inputs` and the `kernel` along the
# last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
# For example, if input has dimensions `(batch_size, d0, d1)`,
# then we create a `kernel` with shape `(d1, units)`, and the `kernel` operates
# along axis 2 of the `input`, on every sub-tensor of shape `(1, 1, d1)`
# (there are `batch_size * d0` such sub-tensors).
# The output in this case will have shape `(batch_size, d0, units)`.
#
# Besides, layer attributes cannot be modified after the layer has been called
# once (except the `trainable` attribute).
# When a popular kwarg `input_shape` is passed, then keras will create
# an input layer to insert before the current layer. This can be treated
# equivalent to explicitly defining an `InputLayer`.
#
# Example:
#
# >>> # Create a `Sequential` model and add a Dense layer as the first layer.
# >>> model = tf.keras.models.Sequential()
# >>> model.add(tf.keras.Input(shape=(16,)))
# >>> model.add(tf.keras.layers.Dense(32, activation='relu'))
# >>> # Now the model will take as input arrays of shape (None, 16)
# >>> # and output arrays of shape (None, 32).
# >>> # Note that after the first layer, you don't need to specify
# >>> # the size of the input anymore:
# >>> model.add(tf.keras.layers.Dense(32))
# >>> model.output_shape
# (None, 32)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     If you don't specify anything, no activation is applied
#     (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean, whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix.
#   bias_initializer: Initializer for the bias vector.
#   kernel_regularizer: Regularizer function applied to
#     the `kernel` weights matrix.
#   bias_regularizer: Regularizer function applied to the bias vector.
#   activity_regularizer: Regularizer function applied to
#     the output of the layer (its "activation").
#   kernel_constraint: Constraint function applied to
#     the `kernel` weights matrix.
#   bias_constraint: Constraint function applied to the bias vector.
#
# Input shape:
#   N-D tensor with shape: `(batch_size, ..., input_dim)`.
#   The most common situation would be
#   a 2D input with shape `(batch_size, input_dim)`.
#
# Output shape:
#   N-D tensor with shape: `(batch_size, ..., units)`.
#   For instance, for a 2D input with shape `(batch_size, input_dim)`,
#   the output would have shape `(batch_size, units)`.
#
# </code>
# <a href='#19'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.compile</u></summary>
# <blockquote>
# <code>
# Configures the model for training.
#
# Example:
#
# ```python
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.BinaryCrossentropy(),
#               metrics=[tf.keras.metrics.BinaryAccuracy(),
#                        tf.keras.metrics.FalseNegatives()])
# ```
#
# Args:
#     optimizer: String (name of optimizer) or optimizer instance. See
#       `tf.keras.optimizers`.
#     loss: Loss function. Maybe be a string (name of loss function), or
#       a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss
#       function is any callable with the signature `loss = fn(y_true,
#       y_pred)`, where `y_true` are the ground truth values, and
#       `y_pred` are the model's predictions.
#       `y_true` should have shape
#       `(batch_size, d0, .. dN)` (except in the case of
#       sparse loss functions such as
#       sparse categorical crossentropy which expects integer arrays of shape
#       `(batch_size, d0, .. dN-1)`).
#       `y_pred` should have shape `(batch_size, d0, .. dN)`.
#       The loss function should return a float tensor.
#       If a custom `Loss` instance is
#       used and reduction is set to `None`, return value has shape
#       `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss
#       values; otherwise, it is a scalar. If the model has multiple outputs,
#       you can use a different loss on each output by passing a dictionary
#       or a list of losses. The loss value that will be minimized by the
#       model will then be the sum of all individual losses, unless
#       `loss_weights` is specified.
#     metrics: List of metrics to be evaluated by the model during training
#       and testing. Each of this can be a string (name of a built-in
#       function), function or a `tf.keras.metrics.Metric` instance. See
#       `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A
#       function is any callable with the signature `result = fn(y_true,
#       y_pred)`. To specify different metrics for different outputs of a
#       multi-output model, you could also pass a dictionary, such as
#       `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
#       You can also pass a list to specify a metric or a list of metrics
#       for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]`
#       or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the
#       strings 'accuracy' or 'acc', we convert this to one of
#       `tf.keras.metrics.BinaryAccuracy`,
#       `tf.keras.metrics.CategoricalAccuracy`,
#       `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss
#       function used and the model output shape. We do a similar
#       conversion for the strings 'crossentropy' and 'ce' as well.
#     loss_weights: Optional list or dictionary specifying scalar coefficients
#       (Python floats) to weight the loss contributions of different model
#       outputs. The loss value that will be minimized by the model will then
#       be the *weighted sum* of all individual losses, weighted by the
#       `loss_weights` coefficients.
#         If a list, it is expected to have a 1:1 mapping to the model's
#           outputs. If a dict, it is expected to map output names (strings)
#           to scalar coefficients.
#     weighted_metrics: List of metrics to be evaluated and weighted by
#       `sample_weight` or `class_weight` during training and testing.
#     run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s
#       logic will not be wrapped in a `tf.function`. Recommended to leave
#       this as `None` unless your `Model` cannot be run inside a
#       `tf.function`. `run_eagerly=True` is not supported when using
#       `tf.distribute.experimental.ParameterServerStrategy`.
#     steps_per_execution: Int. Defaults to 1. The number of batches to run
#       during each `tf.function` call. Running multiple batches inside a
#       single `tf.function` call can greatly improve performance on TPUs or
#       small models with a large Python overhead. At most, one full epoch
#       will be run each execution. If a number larger than the size of the
#       epoch is passed, the execution will be truncated to the size of the
#       epoch. Note that if `steps_per_execution` is set to `N`,
#       `Callback.on_batch_begin` and `Callback.on_batch_end` methods will
#       only be called every `N` batches (i.e. before/after each `tf.function`
#       execution).
#     jit_compile: If `True`, compile the model training step with XLA.
#       [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for
#       machine learning.
#       `jit_compile` is not enabled for by default.
#       This option cannot be enabled with `run_eagerly=True`.
#       Note that `jit_compile=True` is
#       may not necessarily work for all models.
#       For more information on supported operations please refer to the
#       [XLA documentation](https://www.tensorflow.org/xla).
#       Also refer to
#       [known XLA issues](https://www.tensorflow.org/xla/known_issues) for
#       more details.
#     **kwargs: Arguments supported for backwards compatibility only.
#
# </code>
# <a href='#19'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.fit</u></summary>
# <blockquote>
# <code>
# Trains the model for a fixed number of epochs (iterations on a dataset).
#
# Args:
#     x: Input data. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A dict mapping input names to the corresponding array/tensors,
#         if the model has named inputs.
#       - A `tf.data` dataset. Should return a tuple
#         of either `(inputs, targets)` or
#         `(inputs, targets, sample_weights)`.
#       - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
#         or `(inputs, targets, sample_weights)`.
#       - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
#         callable that takes a single argument of type
#         `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
#         `DatasetCreator` should be used when users prefer to specify the
#         per-replica batching and sharding logic for the `Dataset`.
#         See `tf.keras.utils.experimental.DatasetCreator` doc for more
#         information.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given below. If using
#       `tf.distribute.experimental.ParameterServerStrategy`, only
#       `DatasetCreator` type is supported for `x`.
#     y: Target data. Like the input data `x`,
#       it could be either Numpy array(s) or TensorFlow tensor(s).
#       It should be consistent with `x` (you cannot have Numpy inputs and
#       tensor targets, or inversely). If `x` is a dataset, generator,
#       or `keras.utils.Sequence` instance, `y` should
#       not be specified (since targets will be obtained from `x`).
#     batch_size: Integer or `None`.
#         Number of samples per gradient update.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     epochs: Integer. Number of epochs to train the model.
#         An epoch is an iteration over the entire `x` and `y`
#         data provided
#         (unless the `steps_per_epoch` flag is set to
#         something other than None).
#         Note that in conjunction with `initial_epoch`,
#         `epochs` is to be understood as "final epoch".
#         The model is not trained for a number of iterations
#         given by `epochs`, but merely until the epoch
#         of index `epochs` is reached.
#     verbose: 'auto', 0, 1, or 2. Verbosity mode.
#         0 = silent, 1 = progress bar, 2 = one line per epoch.
#         'auto' defaults to 1 for most cases, but 2 when used with
#         `ParameterServerStrategy`. Note that the progress bar is not
#         particularly useful when logged to a file, so verbose=2 is
#         recommended when not running interactively (eg, in a production
#         environment).
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during training.
#         See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger`
#         and `tf.keras.callbacks.History` callbacks are created automatically
#         and need not be passed into `model.fit`.
#         `tf.keras.callbacks.ProgbarLogger` is created or not based on
#         `verbose` argument to `model.fit`.
#         Callbacks with batch-level calls are currently unsupported with
#         `tf.distribute.experimental.ParameterServerStrategy`, and users are
#         advised to implement epoch-level calls instead with an appropriate
#         `steps_per_epoch` value.
#     validation_split: Float between 0 and 1.
#         Fraction of the training data to be used as validation data.
#         The model will set apart this fraction of the training data,
#         will not train on it, and will evaluate
#         the loss and any model metrics
#         on this data at the end of each epoch.
#         The validation data is selected from the last samples
#         in the `x` and `y` data provided, before shuffling. This argument is
#         not supported when `x` is a dataset, generator or
#        `keras.utils.Sequence` instance.
#         `validation_split` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     validation_data: Data on which to evaluate
#         the loss and any model metrics at the end of each epoch.
#         The model will not be trained on this data. Thus, note the fact
#         that the validation loss of data provided using `validation_split`
#         or `validation_data` is not affected by regularization layers like
#         noise and dropout.
#         `validation_data` will override `validation_split`.
#         `validation_data` could be:
#           - A tuple `(x_val, y_val)` of Numpy arrays or tensors.
#           - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays.
#           - A `tf.data.Dataset`.
#           - A Python generator or `keras.utils.Sequence` returning
#           `(inputs, targets)` or `(inputs, targets, sample_weights)`.
#         `validation_data` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     shuffle: Boolean (whether to shuffle the training data
#         before each epoch) or str (for 'batch'). This argument is ignored
#         when `x` is a generator or an object of tf.data.Dataset.
#         'batch' is a special option for dealing
#         with the limitations of HDF5 data; it shuffles in batch-sized
#         chunks. Has no effect when `steps_per_epoch` is not `None`.
#     class_weight: Optional dictionary mapping class indices (integers)
#         to a weight (float) value, used for weighting the loss function
#         (during training only).
#         This can be useful to tell the model to
#         "pay more attention" to samples from
#         an under-represented class.
#     sample_weight: Optional Numpy array of weights for
#         the training samples, used for weighting the loss function
#         (during training only). You can either pass a flat (1D)
#         Numpy array with the same length as the input samples
#         (1:1 mapping between weights and samples),
#         or in the case of temporal data,
#         you can pass a 2D array with shape
#         `(samples, sequence_length)`,
#         to apply a different weight to every timestep of every sample. This
#         argument is not supported when `x` is a dataset, generator, or
#        `keras.utils.Sequence` instance, instead provide the sample_weights
#         as the third element of `x`.
#     initial_epoch: Integer.
#         Epoch at which to start training
#         (useful for resuming a previous training run).
#     steps_per_epoch: Integer or `None`.
#         Total number of steps (batches of samples)
#         before declaring one epoch finished and starting the
#         next epoch. When training with input tensors such as
#         TensorFlow data tensors, the default `None` is equal to
#         the number of samples in your dataset divided by
#         the batch size, or 1 if that cannot be determined. If x is a
#         `tf.data` dataset, and 'steps_per_epoch'
#         is None, the epoch will run until the input dataset is exhausted.
#         When passing an infinitely repeating dataset, you must specify the
#         `steps_per_epoch` argument. If `steps_per_epoch=-1` the training
#         will run indefinitely with an infinitely repeating dataset.
#         This argument is not supported with array inputs.
#         When using `tf.distribute.experimental.ParameterServerStrategy`:
#           * `steps_per_epoch=None` is not supported.
#     validation_steps: Only relevant if `validation_data` is provided and
#         is a `tf.data` dataset. Total number of steps (batches of
#         samples) to draw before stopping when performing validation
#         at the end of every epoch. If 'validation_steps' is None, validation
#         will run until the `validation_data` dataset is exhausted. In the
#         case of an infinitely repeated dataset, it will run into an
#         infinite loop. If 'validation_steps' is specified and only part of
#         the dataset will be consumed, the evaluation will start from the
#         beginning of the dataset at each epoch. This ensures that the same
#         validation samples are used every time.
#     validation_batch_size: Integer or `None`.
#         Number of samples per validation batch.
#         If unspecified, will default to `batch_size`.
#         Do not specify the `validation_batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     validation_freq: Only relevant if validation data is provided. Integer
#         or `collections.abc.Container` instance (e.g. list, tuple, etc.).
#         If an integer, specifies how many training epochs to run before a
#         new validation run is performed, e.g. `validation_freq=2` runs
#         validation every 2 epochs. If a Container, specifies the epochs on
#         which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
#         validation at the end of the 1st, 2nd, and 10th epochs.
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up
#         when using process-based threading. If unspecified, `workers`
#         will default to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# Unpacking behavior for iterator-like inputs:
#     A common pattern is to pass a tf.data.Dataset, generator, or
#   tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
#   yield not only features (x) but optionally targets (y) and sample weights.
#   Keras requires that the output of such iterator-likes be unambiguous. The
#   iterator should return a tuple of length 1, 2, or 3, where the optional
#   second and third elements will be used for y and sample_weight
#   respectively. Any other type provided will be wrapped in a length one
#   tuple, effectively treating everything as 'x'. When yielding dicts, they
#   should still adhere to the top-level tuple structure.
#   e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
#   features, targets, and weights from the keys of a single dict.
#     A notable unsupported data type is the namedtuple. The reason is that
#   it behaves like both an ordered datatype (tuple) and a mapping
#   datatype (dict). So given a namedtuple of the form:
#       `namedtuple("example_tuple", ["y", "x"])`
#   it is ambiguous whether to reverse the order of the elements when
#   interpreting the value. Even worse is a tuple of the form:
#       `namedtuple("other_tuple", ["x", "y", "z"])`
#   where it is unclear if the tuple was intended to be unpacked into x, y,
#   and sample_weight or passed through as a single element to `x`. As a
#   result the data processing code will simply raise a ValueError if it
#   encounters a namedtuple. (Along with instructions to remedy the issue.)
#
# Returns:
#     A `History` object. Its `History.history` attribute is
#     a record of training loss values and metrics values
#     at successive epochs, as well as validation loss values
#     and validation metrics values (if applicable).
#
# Raises:
#     RuntimeError: 1. If the model was never compiled or,
#     2. If `model.fit` is  wrapped in `tf.function`.
#
#     ValueError: In case of mismatch between the provided input data
#         and what the model expects or when the input data is empty.
#
# </code>
# <a href='#19'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
import keras
from keras.layers import Dense
from keras.models import Sequential
from keras.optimizers import Adam 
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from keras.layers import LSTM
from sklearn.model_selection import KFold, cross_val_score, train_test_split
model = Sequential()
model.add(LSTM(4, batch_input_shape=(1, X_train.shape[1], X_train.shape[2]), stateful=True))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, nb_epoch=50, batch_size=1, verbose=1, shuffle=False)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>20. Model Building and Training</h1>  <a id='20'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>keras</h2>
# <ul>
# <li>
# <details><summary><u>keras.engine.training.Model.predict</u></summary>
# <blockquote>
# <code>
# Generates output predictions for the input samples.
#
# Computation is done in batches. This method is designed for batch processing
# of large numbers of inputs. It is not intended for use inside of loops
# that iterate over your data and process small numbers of inputs at a time.
#
# For small numbers of inputs that fit in one batch,
# directly use `__call__()` for faster execution, e.g.,
# `model(x)`, or `model(x, training=False)` if you have layers such as
# `tf.keras.layers.BatchNormalization` that behave differently during
# inference. You may pair the individual model call with a `tf.function`
# for additional performance inside your inner loop.
# If you need access to numpy array values instead of tensors after your
# model call, you can use `tensor.numpy()` to get the numpy array value of
# an eager tensor.
#
# Also, note the fact that test loss is not affected by
# regularization layers like noise and dropout.
#
# Note: See [this FAQ entry](
# https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call)
# for more details about the difference between `Model` methods `predict()`
# and `__call__()`.
#
# Args:
#     x: Input samples. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A `tf.data` dataset.
#       - A generator or `keras.utils.Sequence` instance.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given in the `Unpacking behavior
#       for iterator-like inputs` section of `Model.fit`.
#     batch_size: Integer or `None`.
#         Number of samples per batch.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of dataset, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     verbose: Verbosity mode, 0 or 1.
#     steps: Total number of steps (batches of samples)
#         before declaring the prediction round finished.
#         Ignored with the default value of `None`. If x is a `tf.data`
#         dataset and `steps` is None, `predict()` will
#         run until the input dataset is exhausted.
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during prediction.
#         See [callbacks](/api_docs/python/tf/keras/callbacks).
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up when using
#         process-based threading. If unspecified, `workers` will default
#         to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# See the discussion of `Unpacking behavior for iterator-like inputs` for
# `Model.fit`. Note that Model.predict uses the same interpretation rules as
# `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
# three methods.
#
# Returns:
#     Numpy array(s) of predictions.
#
# Raises:
#     RuntimeError: If `model.predict` is wrapped in a `tf.function`.
#     ValueError: In case of mismatch between the provided
#         input data and the model's expectations,
#         or in case a stateful model receives a number of samples
#         that is not a multiple of the batch size.
#
# </code>
# <a href='#20'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
y_pred = model.predict(X_test,batch_size=1)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>21. Data Preparation | Feature Engineering | Library Loading</h1>  <a id='21'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>numpy</h2>
# <ul>
# <li>
# <details><summary><u>numpy.ndarray.reshape</u></summary>
# <blockquote>
# <code>
# a.reshape(shape, order='C')
#
# Returns an array containing the same data with a new shape.
#
# Refer to `numpy.reshape` for full documentation.
#
# See Also
# --------
# numpy.reshape : equivalent function
#
# Notes
# -----
# Unlike the free function `numpy.reshape`, this method on `ndarray` allows
# the elements of the shape parameter to be passed in as separate arguments.
# For example, ``a.reshape(10, 11)`` is equivalent to
# ``a.reshape((10, 11))``.
#
# </code>
# <a href='#21'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.array</u></summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#21'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
import numpy as np
y_pred = y_pred.reshape(y_pred.shape[0], 1, y_pred.shape[1])

pred_test_set = []
for index in range(0,len(y_pred)):
    print (np.concatenate([y_pred[index],X_test[index]],axis=1))
    pred_test_set.append(np.concatenate([y_pred[index],X_test[index]],axis=1))

pred_test_set = np.array(pred_test_set)
pred_test_set = pred_test_set.reshape(pred_test_set.shape[0], pred_test_set.shape[2])

pred_test_set_inverted = scaler.inverse_transform(pred_test_set)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>22. Data Preparation</h1>  <a id='22'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame</u></summary>
# <blockquote>
# <code>
# Two-dimensional, size-mutable, potentially heterogeneous tabular data.
#
# Data structure also contains labeled axes (rows and columns).
# Arithmetic operations align on both row and column labels. Can be
# thought of as a dict-like container for Series objects. The primary
# pandas data structure.
#
# Parameters
# ----------
# data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
#     Dict can contain Series, arrays, constants, dataclass or list-like objects. If
#     data is a dict, column order follows insertion-order. If a dict contains Series
#     which have an index defined, it is aligned by its index.
#
#     .. versionchanged:: 0.25.0
#        If data is a list of dicts, column order follows insertion-order.
#
# index : Index or array-like
#     Index to use for resulting frame. Will default to RangeIndex if
#     no indexing information part of input data and no index provided.
# columns : Index or array-like
#     Column labels to use for resulting frame when data does not have them,
#     defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels,
#     will perform column selection instead.
# dtype : dtype, default None
#     Data type to force. Only a single dtype is allowed. If None, infer.
# copy : bool or None, default None
#     Copy data from inputs.
#     For dict data, the default of None behaves like ``copy=True``.  For DataFrame
#     or 2d ndarray input, the default of None behaves like ``copy=False``.
#
#     .. versionchanged:: 1.3.0
#
# See Also
# --------
# DataFrame.from_records : Constructor from tuples, also record arrays.
# DataFrame.from_dict : From dicts of Series, arrays, or dicts.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_table : Read general delimited file into DataFrame.
# read_clipboard : Read text from clipboard into DataFrame.
#
# Examples
# --------
# Constructing DataFrame from a dictionary.
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df
#    col1  col2
# 0     1     3
# 1     2     4
#
# Notice that the inferred dtype is int64.
#
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# To enforce a single dtype:
#
# >>> df = pd.DataFrame(data=d, dtype=np.int8)
# >>> df.dtypes
# col1    int8
# col2    int8
# dtype: object
#
# Constructing DataFrame from a dictionary including Series:
#
# >>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])}
# >>> pd.DataFrame(data=d, index=[0, 1, 2, 3])
#    col1  col2
# 0     0   NaN
# 1     1   NaN
# 2     2   2.0
# 3     3   3.0
#
# Constructing DataFrame from numpy ndarray:
#
# >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
# ...                    columns=['a', 'b', 'c'])
# >>> df2
#    a  b  c
# 0  1  2  3
# 1  4  5  6
# 2  7  8  9
#
# Constructing DataFrame from a numpy ndarray that has labeled columns:
#
# >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
# ...                 dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")])
# >>> df3 = pd.DataFrame(data, columns=['c', 'a'])
# ...
# >>> df3
#    c  a
# 0  3  1
# 1  6  4
# 2  9  7
#
# Constructing DataFrame from dataclass:
#
# >>> from dataclasses import make_dataclass
# >>> Point = make_dataclass("Point", [("x", int), ("y", int)])
# >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
#    x  y
# 0  0  0
# 1  0  3
# 2  2  3
#
# </code>
# <a href='#22'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
result_list = []
sales_dates = list(sales[-7:].date)
act_sales = list(sales[-7:].item_cnt_day)
for index in range(0,len(pred_test_set_inverted)):
    result_dict = {}
    result_dict['pred_value'] = int(pred_test_set_inverted[index][0] + act_sales[index])
    result_dict['date'] = sales_dates[index+1]
    result_list.append(result_dict)
df_result = pd.DataFrame(result_list)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>23. Data Preparation</h1>  <a id='23'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.to_csv</u></summary>
# <blockquote>
# <code>
# Write object to a comma-separated values (csv) file.
#
# Parameters
# ----------
# path_or_buf : str, path object, file-like object, or None, default None
#     String, path object (implementing os.PathLike[str]), or file-like
#     object implementing a write() function. If None, the result is
#     returned as a string. If a non-binary file object is passed, it should
#     be opened with `newline=''`, disabling universal newlines. If a binary
#     file object is passed, `mode` might need to contain a `'b'`.
#
#     .. versionchanged:: 1.2.0
#
#        Support for binary file objects was introduced.
#
# sep : str, default ','
#     String of length 1. Field delimiter for the output file.
# na_rep : str, default ''
#     Missing data representation.
# float_format : str, default None
#     Format string for floating point numbers.
# columns : sequence, optional
#     Columns to write.
# header : bool or list of str, default True
#     Write out the column names. If a list of strings is given it is
#     assumed to be aliases for the column names.
# index : bool, default True
#     Write row names (index).
# index_label : str or sequence, or False, default None
#     Column label for index column(s) if desired. If None is given, and
#     `header` and `index` are True, then the index names are used. A
#     sequence should be given if the object uses MultiIndex. If
#     False do not print fields for index names. Use index_label=False
#     for easier importing in R.
# mode : str
#     Python write mode, default 'w'.
# encoding : str, optional
#     A string representing the encoding to use in the output file,
#     defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
#     is a non-binary file object.
# compression : str or dict, default 'infer'
#     For on-the-fly compression of the output data. If 'infer' and '%s'
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). Set to
#     ``None`` for no compression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for faster compression and to create
#     a reproducible gzip archive:
#     ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
#
#     .. versionchanged:: 1.0.0
#
#        May now be a dict with key 'method' as compression mode
#        and other entries as additional compression options if
#        compression mode is 'zip'.
#
#     .. versionchanged:: 1.1.0
#
#        Passing compression options as keys in dict is
#        supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'.
#
#     .. versionchanged:: 1.2.0
#
#         Compression is supported for binary file objects.
#
#     .. versionchanged:: 1.2.0
#
#         Previous versions forwarded dict entries for 'gzip' to
#         `gzip.open` instead of `gzip.GzipFile` which prevented
#         setting `mtime`.
#
# quoting : optional constant from csv module
#     Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
#     then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
#     will treat them as non-numeric.
# quotechar : str, default '\"'
#     String of length 1. Character used to quote fields.
# line_terminator : str, optional
#     The newline character or character sequence to use in the output
#     file. Defaults to `os.linesep`, which depends on the OS in which
#     this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).
# chunksize : int or None
#     Rows to write at a time.
# date_format : str, default None
#     Format string for datetime objects.
# doublequote : bool, default True
#     Control quoting of `quotechar` inside a field.
# escapechar : str, default None
#     String of length 1. Character used to escape `sep` and `quotechar`
#     when appropriate.
# decimal : str, default '.'
#     Character recognized as decimal separator. E.g. use ',' for
#     European data.
# errors : str, default 'strict'
#     Specifies how encoding and decoding errors are to be handled.
#     See the errors argument for :func:`open` for a full list
#     of options.
#
#     .. versionadded:: 1.1.0
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2.0
#
# Returns
# -------
# None or str
#     If path_or_buf is None, returns the resulting csv format as a
#     string. Otherwise returns None.
#
# See Also
# --------
# read_csv : Load a CSV file into a DataFrame.
# to_excel : Write DataFrame to an Excel file.
#
# Examples
# --------
# >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
# ...                    'mask': ['red', 'purple'],
# ...                    'weapon': ['sai', 'bo staff']})
# >>> df.to_csv(index=False)
# 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
#
# Create 'out.zip' containing 'out.csv'
#
# >>> compression_opts = dict(method='zip',
# ...                         archive_name='out.csv')  # doctest: +SKIP
# >>> df.to_csv('out.zip', index=False,
# ...           compression=compression_opts)  # doctest: +SKIP
#
# To write a csv file to a new folder or nested folder you will first
# need to create it using either Pathlib or os:
#
# >>> from pathlib import Path  # doctest: +SKIP
# >>> filepath = Path('folder/subfolder/out.csv')  # doctest: +SKIP
# >>> filepath.parent.mkdir(parents=True, exist_ok=True)  # doctest: +SKIP
# >>> df.to_csv(filepath)  # doctest: +SKIP
#
# >>> import os  # doctest: +SKIP
# >>> os.makedirs('folder/subfolder', exist_ok=True)  # doctest: +SKIP
# >>> df.to_csv('folder/subfolder/out.csv')  # doctest: +SKIP
#
# </code>
# <a href='#23'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
df_result.to_csv("Predict.csv")

# %%
