# ---
# jupyter:
#   jupytext:
#     text_representation:
#       extension: .py
#       format_name: percent
#       format_version: '1.3'
#       jupytext_version: 1.14.1
#   kernelspec:
#     display_name: Python 3
#     language: python
#     name: python3
# ---

# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# # Index of ML Operations<a id='top_phases'></a>
# <ul>
# <ul><li><details><summary><h2>Imported Libraries</h2></summary>
# <ul>
#
# <li><b>numpy</b></li>
# <li><b>pandas</b></li>
# <li><b>tensorflow</b></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Visualization</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Data Preparation</span></h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Data Preparation" Calls</u></b></summary>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.replace</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['male', 0] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Replace values given in `to_replace` with `value`.
#
# Values of the DataFrame are replaced with other values dynamically.
#
# This differs from updating with ``.loc`` or ``.iloc``, which require
# you to specify a location to update with some value.
#
# Parameters
# ----------
# to_replace : str, regex, list, dict, Series, int, float, or None
#     How to find the values that will be replaced.
#
#     * numeric, str or regex:
#
#         - numeric: numeric values equal to `to_replace` will be
#           replaced with `value`
#         - str: string exactly matching `to_replace` will be replaced
#           with `value`
#         - regex: regexs matching `to_replace` will be replaced with
#           `value`
#
#     * list of str, regex, or numeric:
#
#         - First, if `to_replace` and `value` are both lists, they
#           **must** be the same length.
#         - Second, if ``regex=True`` then all of the strings in **both**
#           lists will be interpreted as regexs otherwise they will match
#           directly. This doesn't matter much for `value` since there
#           are only a few possible substitution regexes you can use.
#         - str, regex and numeric rules apply as above.
#
#     * dict:
#
#         - Dicts can be used to specify different replacement values
#           for different existing values. For example,
#           ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and
#           'y' with 'z'. To use a dict in this way the `value`
#           parameter should be `None`.
#         - For a DataFrame a dict can specify that different values
#           should be replaced in different columns. For example,
#           ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a'
#           and the value 'z' in column 'b' and replaces these values
#           with whatever is specified in `value`. The `value` parameter
#           should not be ``None`` in this case. You can treat this as a
#           special case of passing two lists except that you are
#           specifying the column to search in.
#         - For a DataFrame nested dictionaries, e.g.,
#           ``{'a': {'b': np.nan}}``, are read as follows: look in column
#           'a' for the value 'b' and replace it with NaN. The `value`
#           parameter should be ``None`` to use a nested dict in this
#           way. You can nest regular expressions as well. Note that
#           column names (the top-level dictionary keys in a nested
#           dictionary) **cannot** be regular expressions.
#
#     * None:
#
#         - This means that the `regex` argument must be a string,
#           compiled regular expression, or list, dict, ndarray or
#           Series of such elements. If `value` is also ``None`` then
#           this **must** be a nested dictionary or Series.
#
#     See the examples section for examples of each of these.
# value : scalar, dict, list, str, regex, default None
#     Value to replace any values matching `to_replace` with.
#     For a DataFrame a dict of values can be used to specify which
#     value to use for each column (columns not in the dict will not be
#     filled). Regular expressions, strings and lists or dicts of such
#     objects are also allowed.
#
# inplace : bool, default False
#     If True, performs operation inplace and returns None.
# limit : int, default None
#     Maximum size gap to forward or backward fill.
# regex : bool or same types as `to_replace`, default False
#     Whether to interpret `to_replace` and/or `value` as regular
#     expressions. If this is ``True`` then `to_replace` *must* be a
#     string. Alternatively, this could be a regular expression or a
#     list, dict, or array of regular expressions in which case
#     `to_replace` must be ``None``.
# method : {'pad', 'ffill', 'bfill', `None`}
#     The method to use when for replacement, when `to_replace` is a
#     scalar, list or tuple and `value` is ``None``.
#
#     .. versionchanged:: 0.23.0
#         Added to DataFrame.
#
# Returns
# -------
# DataFrame
#     Object after replacement.
#
# Raises
# ------
# AssertionError
#     * If `regex` is not a ``bool`` and `to_replace` is not
#       ``None``.
#
# TypeError
#     * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
#     * If `to_replace` is a ``dict`` and `value` is not a ``list``,
#       ``dict``, ``ndarray``, or ``Series``
#     * If `to_replace` is ``None`` and `regex` is not compilable
#       into a regular expression or is a list, dict, ndarray, or
#       Series.
#     * When replacing multiple ``bool`` or ``datetime64`` objects and
#       the arguments to `to_replace` does not match the type of the
#       value being replaced
#
# ValueError
#     * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
#       `value` but they are not the same length.
#
# See Also
# --------
# DataFrame.fillna : Fill NA values.
# DataFrame.where : Replace values based on boolean condition.
# Series.str.replace : Simple string replacement.
#
# Notes
# -----
# * Regex substitution is performed under the hood with ``re.sub``. The
#   rules for substitution for ``re.sub`` are the same.
# * Regular expressions will only substitute on strings, meaning you
#   cannot provide, for example, a regular expression matching floating
#   point numbers and expect the columns in your frame that have a
#   numeric dtype to be matched. However, if those floating point
#   numbers *are* strings, then you can do this.
# * This method has *a lot* of options. You are encouraged to experiment
#   and play with this method to gain intuition about how it works.
# * When dict is used as the `to_replace` value, it is like
#   key(s) in the dict are the to_replace part and
#   value(s) in the dict are the value parameter.
#
# Examples
# --------
#
# **Scalar `to_replace` and `value`**
#
# >>> s = pd.Series([1, 2, 3, 4, 5])
# >>> s.replace(1, 5)
# 0    5
# 1    2
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
# ...                    'B': [5, 6, 7, 8, 9],
# ...                    'C': ['a', 'b', 'c', 'd', 'e']})
# >>> df.replace(0, 5)
#     A  B  C
# 0  5  5  a
# 1  1  6  b
# 2  2  7  c
# 3  3  8  d
# 4  4  9  e
#
# **List-like `to_replace`**
#
# >>> df.replace([0, 1, 2, 3], 4)
#     A  B  C
# 0  4  5  a
# 1  4  6  b
# 2  4  7  c
# 3  4  8  d
# 4  4  9  e
#
# >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
#     A  B  C
# 0  4  5  a
# 1  3  6  b
# 2  2  7  c
# 3  1  8  d
# 4  4  9  e
#
# >>> s.replace([1, 2], method='bfill')
# 0    3
# 1    3
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# **dict-like `to_replace`**
#
# >>> df.replace({0: 10, 1: 100})
#         A  B  C
# 0   10  5  a
# 1  100  6  b
# 2    2  7  c
# 3    3  8  d
# 4    4  9  e
#
# >>> df.replace({'A': 0, 'B': 5}, 100)
#         A    B  C
# 0  100  100  a
# 1    1    6  b
# 2    2    7  c
# 3    3    8  d
# 4    4    9  e
#
# >>> df.replace({'A': {0: 100, 4: 400}})
#         A  B  C
# 0  100  5  a
# 1    1  6  b
# 2    2  7  c
# 3    3  8  d
# 4  400  9  e
#
# **Regular expression `to_replace`**
#
# >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
# ...                    'B': ['abc', 'bar', 'xyz']})
# >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
#         A    B
# 0   new  abc
# 1   foo  bar
# 2  bait  xyz
#
# >>> df.replace(regex=r'^ba.$', value='new')
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
#         A    B
# 0   new  abc
# 1   xyz  new
# 2  bait  xyz
#
# >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
#         A    B
# 0   new  abc
# 1   new  new
# 2  bait  xyz
#
# Compare the behavior of ``s.replace({'a': None})`` and
# ``s.replace('a', None)`` to understand the peculiarities
# of the `to_replace` parameter:
#
# >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
#
# When one uses a dict as the `to_replace` value, it is like the
# value(s) in the dict are equal to the `value` parameter.
# ``s.replace({'a': None})`` is equivalent to
# ``s.replace(to_replace={'a': None}, value=None, method=None)``:
#
# >>> s.replace({'a': None})
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
# When ``value`` is not explicitly passed and `to_replace` is a scalar, list
# or tuple, `replace` uses the method parameter (default 'pad') to do the
# replacement. So this is why the 'a' values are being replaced by 10
# in rows 1 and 2 and 'b' in row 4 in this case.
#
# >>> s.replace('a')
# 0    10
# 1    10
# 2    10
# 3     b
# 4     b
# dtype: object
#
# On the other hand, if ``None`` is explicitly passed for ``value``, it will
# be respected:
#
# >>> s.replace('a', None)
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
#     .. versionchanged:: 1.4.0
#         Previously the explicit ``None`` was silently ignored.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.tools.numeric.to_numeric</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'errors': 'coerce'}</li></ul>
# <blockquote>
# <code>
# Convert argument to a numeric type.
#
# The default return dtype is `float64` or `int64`
# depending on the data supplied. Use the `downcast` parameter
# to obtain other dtypes.
#
# Please note that precision loss may occur if really large numbers
# are passed in. Due to the internal limitations of `ndarray`, if
# numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
# or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
# passed in, it is very likely they will be converted to float so that
# they can stored in an `ndarray`. These warnings apply similarly to
# `Series` since it internally leverages `ndarray`.
#
# Parameters
# ----------
# arg : scalar, list, tuple, 1-d array, or Series
#     Argument to be converted.
# errors : {'ignore', 'raise', 'coerce'}, default 'raise'
#     - If 'raise', then invalid parsing will raise an exception.
#     - If 'coerce', then invalid parsing will be set as NaN.
#     - If 'ignore', then invalid parsing will return the input.
# downcast : str, default None
#     Can be 'integer', 'signed', 'unsigned', or 'float'.
#     If not None, and if the data has been successfully cast to a
#     numerical dtype (or if the data was numeric to begin with),
#     downcast that resulting data to the smallest numerical dtype
#     possible according to the following rules:
#
#     - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
#     - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
#     - 'float': smallest float dtype (min.: np.float32)
#
#     As this behaviour is separate from the core conversion to
#     numeric values, any errors raised during the downcasting
#     will be surfaced regardless of the value of the 'errors' input.
#
#     In addition, downcasting will only occur if the size
#     of the resulting data's dtype is strictly larger than
#     the dtype it is to be cast to, so if none of the dtypes
#     checked satisfy that specification, no downcasting will be
#     performed on the data.
#
# Returns
# -------
# ret
#     Numeric if parsing succeeded.
#     Return type depends on input.  Series if Series, otherwise ndarray.
#
# See Also
# --------
# DataFrame.astype : Cast argument to a specified dtype.
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
# DataFrame.convert_dtypes : Convert dtypes.
#
# Examples
# --------
# Take separate series and convert to numeric, coercing when told to
#
# >>> s = pd.Series(['1.0', '2', -3])
# >>> pd.to_numeric(s)
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float64
# >>> pd.to_numeric(s, downcast='float')
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float32
# >>> pd.to_numeric(s, downcast='signed')
# 0    1
# 1    2
# 2   -3
# dtype: int8
# >>> s = pd.Series(['apple', '1.0', '2', -3])
# >>> pd.to_numeric(s, errors='ignore')
# 0    apple
# 1      1.0
# 2        2
# 3       -3
# dtype: object
# >>> pd.to_numeric(s, errors='coerce')
# 0    NaN
# 1    1.0
# 2    2.0
# 3   -3.0
# dtype: float64
#
# Downcasting of nullable integer and floating dtypes is supported:
#
# >>> s = pd.Series([1, 2, 3], dtype="Int64")
# >>> pd.to_numeric(s, downcast="integer")
# 0    1
# 1    2
# 2    3
# dtype: Int8
# >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
# >>> pd.to_numeric(s, downcast="float")
# 0    1.0
# 1    2.1
# 2    3.0
# dtype: Float32
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy.array</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 1</u></h3></summary><small><a href=#1>goto cell # 1</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.replace</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['male', 0] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Replace values given in `to_replace` with `value`.
#
# Values of the DataFrame are replaced with other values dynamically.
#
# This differs from updating with ``.loc`` or ``.iloc``, which require
# you to specify a location to update with some value.
#
# Parameters
# ----------
# to_replace : str, regex, list, dict, Series, int, float, or None
#     How to find the values that will be replaced.
#
#     * numeric, str or regex:
#
#         - numeric: numeric values equal to `to_replace` will be
#           replaced with `value`
#         - str: string exactly matching `to_replace` will be replaced
#           with `value`
#         - regex: regexs matching `to_replace` will be replaced with
#           `value`
#
#     * list of str, regex, or numeric:
#
#         - First, if `to_replace` and `value` are both lists, they
#           **must** be the same length.
#         - Second, if ``regex=True`` then all of the strings in **both**
#           lists will be interpreted as regexs otherwise they will match
#           directly. This doesn't matter much for `value` since there
#           are only a few possible substitution regexes you can use.
#         - str, regex and numeric rules apply as above.
#
#     * dict:
#
#         - Dicts can be used to specify different replacement values
#           for different existing values. For example,
#           ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and
#           'y' with 'z'. To use a dict in this way the `value`
#           parameter should be `None`.
#         - For a DataFrame a dict can specify that different values
#           should be replaced in different columns. For example,
#           ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a'
#           and the value 'z' in column 'b' and replaces these values
#           with whatever is specified in `value`. The `value` parameter
#           should not be ``None`` in this case. You can treat this as a
#           special case of passing two lists except that you are
#           specifying the column to search in.
#         - For a DataFrame nested dictionaries, e.g.,
#           ``{'a': {'b': np.nan}}``, are read as follows: look in column
#           'a' for the value 'b' and replace it with NaN. The `value`
#           parameter should be ``None`` to use a nested dict in this
#           way. You can nest regular expressions as well. Note that
#           column names (the top-level dictionary keys in a nested
#           dictionary) **cannot** be regular expressions.
#
#     * None:
#
#         - This means that the `regex` argument must be a string,
#           compiled regular expression, or list, dict, ndarray or
#           Series of such elements. If `value` is also ``None`` then
#           this **must** be a nested dictionary or Series.
#
#     See the examples section for examples of each of these.
# value : scalar, dict, list, str, regex, default None
#     Value to replace any values matching `to_replace` with.
#     For a DataFrame a dict of values can be used to specify which
#     value to use for each column (columns not in the dict will not be
#     filled). Regular expressions, strings and lists or dicts of such
#     objects are also allowed.
#
# inplace : bool, default False
#     If True, performs operation inplace and returns None.
# limit : int, default None
#     Maximum size gap to forward or backward fill.
# regex : bool or same types as `to_replace`, default False
#     Whether to interpret `to_replace` and/or `value` as regular
#     expressions. If this is ``True`` then `to_replace` *must* be a
#     string. Alternatively, this could be a regular expression or a
#     list, dict, or array of regular expressions in which case
#     `to_replace` must be ``None``.
# method : {'pad', 'ffill', 'bfill', `None`}
#     The method to use when for replacement, when `to_replace` is a
#     scalar, list or tuple and `value` is ``None``.
#
#     .. versionchanged:: 0.23.0
#         Added to DataFrame.
#
# Returns
# -------
# DataFrame
#     Object after replacement.
#
# Raises
# ------
# AssertionError
#     * If `regex` is not a ``bool`` and `to_replace` is not
#       ``None``.
#
# TypeError
#     * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
#     * If `to_replace` is a ``dict`` and `value` is not a ``list``,
#       ``dict``, ``ndarray``, or ``Series``
#     * If `to_replace` is ``None`` and `regex` is not compilable
#       into a regular expression or is a list, dict, ndarray, or
#       Series.
#     * When replacing multiple ``bool`` or ``datetime64`` objects and
#       the arguments to `to_replace` does not match the type of the
#       value being replaced
#
# ValueError
#     * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
#       `value` but they are not the same length.
#
# See Also
# --------
# DataFrame.fillna : Fill NA values.
# DataFrame.where : Replace values based on boolean condition.
# Series.str.replace : Simple string replacement.
#
# Notes
# -----
# * Regex substitution is performed under the hood with ``re.sub``. The
#   rules for substitution for ``re.sub`` are the same.
# * Regular expressions will only substitute on strings, meaning you
#   cannot provide, for example, a regular expression matching floating
#   point numbers and expect the columns in your frame that have a
#   numeric dtype to be matched. However, if those floating point
#   numbers *are* strings, then you can do this.
# * This method has *a lot* of options. You are encouraged to experiment
#   and play with this method to gain intuition about how it works.
# * When dict is used as the `to_replace` value, it is like
#   key(s) in the dict are the to_replace part and
#   value(s) in the dict are the value parameter.
#
# Examples
# --------
#
# **Scalar `to_replace` and `value`**
#
# >>> s = pd.Series([1, 2, 3, 4, 5])
# >>> s.replace(1, 5)
# 0    5
# 1    2
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
# ...                    'B': [5, 6, 7, 8, 9],
# ...                    'C': ['a', 'b', 'c', 'd', 'e']})
# >>> df.replace(0, 5)
#     A  B  C
# 0  5  5  a
# 1  1  6  b
# 2  2  7  c
# 3  3  8  d
# 4  4  9  e
#
# **List-like `to_replace`**
#
# >>> df.replace([0, 1, 2, 3], 4)
#     A  B  C
# 0  4  5  a
# 1  4  6  b
# 2  4  7  c
# 3  4  8  d
# 4  4  9  e
#
# >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
#     A  B  C
# 0  4  5  a
# 1  3  6  b
# 2  2  7  c
# 3  1  8  d
# 4  4  9  e
#
# >>> s.replace([1, 2], method='bfill')
# 0    3
# 1    3
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# **dict-like `to_replace`**
#
# >>> df.replace({0: 10, 1: 100})
#         A  B  C
# 0   10  5  a
# 1  100  6  b
# 2    2  7  c
# 3    3  8  d
# 4    4  9  e
#
# >>> df.replace({'A': 0, 'B': 5}, 100)
#         A    B  C
# 0  100  100  a
# 1    1    6  b
# 2    2    7  c
# 3    3    8  d
# 4    4    9  e
#
# >>> df.replace({'A': {0: 100, 4: 400}})
#         A  B  C
# 0  100  5  a
# 1    1  6  b
# 2    2  7  c
# 3    3  8  d
# 4  400  9  e
#
# **Regular expression `to_replace`**
#
# >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
# ...                    'B': ['abc', 'bar', 'xyz']})
# >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
#         A    B
# 0   new  abc
# 1   foo  bar
# 2  bait  xyz
#
# >>> df.replace(regex=r'^ba.$', value='new')
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
#         A    B
# 0   new  abc
# 1   xyz  new
# 2  bait  xyz
#
# >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
#         A    B
# 0   new  abc
# 1   new  new
# 2  bait  xyz
#
# Compare the behavior of ``s.replace({'a': None})`` and
# ``s.replace('a', None)`` to understand the peculiarities
# of the `to_replace` parameter:
#
# >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
#
# When one uses a dict as the `to_replace` value, it is like the
# value(s) in the dict are equal to the `value` parameter.
# ``s.replace({'a': None})`` is equivalent to
# ``s.replace(to_replace={'a': None}, value=None, method=None)``:
#
# >>> s.replace({'a': None})
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
# When ``value`` is not explicitly passed and `to_replace` is a scalar, list
# or tuple, `replace` uses the method parameter (default 'pad') to do the
# replacement. So this is why the 'a' values are being replaced by 10
# in rows 1 and 2 and 'b' in row 4 in this case.
#
# >>> s.replace('a')
# 0    10
# 1    10
# 2    10
# 3     b
# 4     b
# dtype: object
#
# On the other hand, if ``None`` is explicitly passed for ``value``, it will
# be respected:
#
# >>> s.replace('a', None)
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
#     .. versionchanged:: 1.4.0
#         Previously the explicit ``None`` was silently ignored.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.tools.numeric.to_numeric</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'errors': 'coerce'}</li></ul>
# <blockquote>
# <code>
# Convert argument to a numeric type.
#
# The default return dtype is `float64` or `int64`
# depending on the data supplied. Use the `downcast` parameter
# to obtain other dtypes.
#
# Please note that precision loss may occur if really large numbers
# are passed in. Due to the internal limitations of `ndarray`, if
# numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
# or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
# passed in, it is very likely they will be converted to float so that
# they can stored in an `ndarray`. These warnings apply similarly to
# `Series` since it internally leverages `ndarray`.
#
# Parameters
# ----------
# arg : scalar, list, tuple, 1-d array, or Series
#     Argument to be converted.
# errors : {'ignore', 'raise', 'coerce'}, default 'raise'
#     - If 'raise', then invalid parsing will raise an exception.
#     - If 'coerce', then invalid parsing will be set as NaN.
#     - If 'ignore', then invalid parsing will return the input.
# downcast : str, default None
#     Can be 'integer', 'signed', 'unsigned', or 'float'.
#     If not None, and if the data has been successfully cast to a
#     numerical dtype (or if the data was numeric to begin with),
#     downcast that resulting data to the smallest numerical dtype
#     possible according to the following rules:
#
#     - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
#     - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
#     - 'float': smallest float dtype (min.: np.float32)
#
#     As this behaviour is separate from the core conversion to
#     numeric values, any errors raised during the downcasting
#     will be surfaced regardless of the value of the 'errors' input.
#
#     In addition, downcasting will only occur if the size
#     of the resulting data's dtype is strictly larger than
#     the dtype it is to be cast to, so if none of the dtypes
#     checked satisfy that specification, no downcasting will be
#     performed on the data.
#
# Returns
# -------
# ret
#     Numeric if parsing succeeded.
#     Return type depends on input.  Series if Series, otherwise ndarray.
#
# See Also
# --------
# DataFrame.astype : Cast argument to a specified dtype.
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
# DataFrame.convert_dtypes : Convert dtypes.
#
# Examples
# --------
# Take separate series and convert to numeric, coercing when told to
#
# >>> s = pd.Series(['1.0', '2', -3])
# >>> pd.to_numeric(s)
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float64
# >>> pd.to_numeric(s, downcast='float')
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float32
# >>> pd.to_numeric(s, downcast='signed')
# 0    1
# 1    2
# 2   -3
# dtype: int8
# >>> s = pd.Series(['apple', '1.0', '2', -3])
# >>> pd.to_numeric(s, errors='ignore')
# 0    apple
# 1      1.0
# 2        2
# 3       -3
# dtype: object
# >>> pd.to_numeric(s, errors='coerce')
# 0    NaN
# 1    1.0
# 2    2.0
# 3   -3.0
# dtype: float64
#
# Downcasting of nullable integer and floating dtypes is supported:
#
# >>> s = pd.Series([1, 2, 3], dtype="Int64")
# >>> pd.to_numeric(s, downcast="integer")
# 0    1
# 1    2
# 2    3
# dtype: Int8
# >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
# >>> pd.to_numeric(s, downcast="float")
# 0    1.0
# 1    2.1
# 2    3.0
# dtype: Float32
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy.array</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 4</u></h3></summary><small><a href=#4>goto cell # 4</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.replace</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> ['male', 0] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Replace values given in `to_replace` with `value`.
#
# Values of the DataFrame are replaced with other values dynamically.
#
# This differs from updating with ``.loc`` or ``.iloc``, which require
# you to specify a location to update with some value.
#
# Parameters
# ----------
# to_replace : str, regex, list, dict, Series, int, float, or None
#     How to find the values that will be replaced.
#
#     * numeric, str or regex:
#
#         - numeric: numeric values equal to `to_replace` will be
#           replaced with `value`
#         - str: string exactly matching `to_replace` will be replaced
#           with `value`
#         - regex: regexs matching `to_replace` will be replaced with
#           `value`
#
#     * list of str, regex, or numeric:
#
#         - First, if `to_replace` and `value` are both lists, they
#           **must** be the same length.
#         - Second, if ``regex=True`` then all of the strings in **both**
#           lists will be interpreted as regexs otherwise they will match
#           directly. This doesn't matter much for `value` since there
#           are only a few possible substitution regexes you can use.
#         - str, regex and numeric rules apply as above.
#
#     * dict:
#
#         - Dicts can be used to specify different replacement values
#           for different existing values. For example,
#           ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and
#           'y' with 'z'. To use a dict in this way the `value`
#           parameter should be `None`.
#         - For a DataFrame a dict can specify that different values
#           should be replaced in different columns. For example,
#           ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a'
#           and the value 'z' in column 'b' and replaces these values
#           with whatever is specified in `value`. The `value` parameter
#           should not be ``None`` in this case. You can treat this as a
#           special case of passing two lists except that you are
#           specifying the column to search in.
#         - For a DataFrame nested dictionaries, e.g.,
#           ``{'a': {'b': np.nan}}``, are read as follows: look in column
#           'a' for the value 'b' and replace it with NaN. The `value`
#           parameter should be ``None`` to use a nested dict in this
#           way. You can nest regular expressions as well. Note that
#           column names (the top-level dictionary keys in a nested
#           dictionary) **cannot** be regular expressions.
#
#     * None:
#
#         - This means that the `regex` argument must be a string,
#           compiled regular expression, or list, dict, ndarray or
#           Series of such elements. If `value` is also ``None`` then
#           this **must** be a nested dictionary or Series.
#
#     See the examples section for examples of each of these.
# value : scalar, dict, list, str, regex, default None
#     Value to replace any values matching `to_replace` with.
#     For a DataFrame a dict of values can be used to specify which
#     value to use for each column (columns not in the dict will not be
#     filled). Regular expressions, strings and lists or dicts of such
#     objects are also allowed.
#
# inplace : bool, default False
#     If True, performs operation inplace and returns None.
# limit : int, default None
#     Maximum size gap to forward or backward fill.
# regex : bool or same types as `to_replace`, default False
#     Whether to interpret `to_replace` and/or `value` as regular
#     expressions. If this is ``True`` then `to_replace` *must* be a
#     string. Alternatively, this could be a regular expression or a
#     list, dict, or array of regular expressions in which case
#     `to_replace` must be ``None``.
# method : {'pad', 'ffill', 'bfill', `None`}
#     The method to use when for replacement, when `to_replace` is a
#     scalar, list or tuple and `value` is ``None``.
#
#     .. versionchanged:: 0.23.0
#         Added to DataFrame.
#
# Returns
# -------
# DataFrame
#     Object after replacement.
#
# Raises
# ------
# AssertionError
#     * If `regex` is not a ``bool`` and `to_replace` is not
#       ``None``.
#
# TypeError
#     * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
#     * If `to_replace` is a ``dict`` and `value` is not a ``list``,
#       ``dict``, ``ndarray``, or ``Series``
#     * If `to_replace` is ``None`` and `regex` is not compilable
#       into a regular expression or is a list, dict, ndarray, or
#       Series.
#     * When replacing multiple ``bool`` or ``datetime64`` objects and
#       the arguments to `to_replace` does not match the type of the
#       value being replaced
#
# ValueError
#     * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
#       `value` but they are not the same length.
#
# See Also
# --------
# DataFrame.fillna : Fill NA values.
# DataFrame.where : Replace values based on boolean condition.
# Series.str.replace : Simple string replacement.
#
# Notes
# -----
# * Regex substitution is performed under the hood with ``re.sub``. The
#   rules for substitution for ``re.sub`` are the same.
# * Regular expressions will only substitute on strings, meaning you
#   cannot provide, for example, a regular expression matching floating
#   point numbers and expect the columns in your frame that have a
#   numeric dtype to be matched. However, if those floating point
#   numbers *are* strings, then you can do this.
# * This method has *a lot* of options. You are encouraged to experiment
#   and play with this method to gain intuition about how it works.
# * When dict is used as the `to_replace` value, it is like
#   key(s) in the dict are the to_replace part and
#   value(s) in the dict are the value parameter.
#
# Examples
# --------
#
# **Scalar `to_replace` and `value`**
#
# >>> s = pd.Series([1, 2, 3, 4, 5])
# >>> s.replace(1, 5)
# 0    5
# 1    2
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
# ...                    'B': [5, 6, 7, 8, 9],
# ...                    'C': ['a', 'b', 'c', 'd', 'e']})
# >>> df.replace(0, 5)
#     A  B  C
# 0  5  5  a
# 1  1  6  b
# 2  2  7  c
# 3  3  8  d
# 4  4  9  e
#
# **List-like `to_replace`**
#
# >>> df.replace([0, 1, 2, 3], 4)
#     A  B  C
# 0  4  5  a
# 1  4  6  b
# 2  4  7  c
# 3  4  8  d
# 4  4  9  e
#
# >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
#     A  B  C
# 0  4  5  a
# 1  3  6  b
# 2  2  7  c
# 3  1  8  d
# 4  4  9  e
#
# >>> s.replace([1, 2], method='bfill')
# 0    3
# 1    3
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# **dict-like `to_replace`**
#
# >>> df.replace({0: 10, 1: 100})
#         A  B  C
# 0   10  5  a
# 1  100  6  b
# 2    2  7  c
# 3    3  8  d
# 4    4  9  e
#
# >>> df.replace({'A': 0, 'B': 5}, 100)
#         A    B  C
# 0  100  100  a
# 1    1    6  b
# 2    2    7  c
# 3    3    8  d
# 4    4    9  e
#
# >>> df.replace({'A': {0: 100, 4: 400}})
#         A  B  C
# 0  100  5  a
# 1    1  6  b
# 2    2  7  c
# 3    3  8  d
# 4  400  9  e
#
# **Regular expression `to_replace`**
#
# >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
# ...                    'B': ['abc', 'bar', 'xyz']})
# >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
#         A    B
# 0   new  abc
# 1   foo  bar
# 2  bait  xyz
#
# >>> df.replace(regex=r'^ba.$', value='new')
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
#         A    B
# 0   new  abc
# 1   xyz  new
# 2  bait  xyz
#
# >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
#         A    B
# 0   new  abc
# 1   new  new
# 2  bait  xyz
#
# Compare the behavior of ``s.replace({'a': None})`` and
# ``s.replace('a', None)`` to understand the peculiarities
# of the `to_replace` parameter:
#
# >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
#
# When one uses a dict as the `to_replace` value, it is like the
# value(s) in the dict are equal to the `value` parameter.
# ``s.replace({'a': None})`` is equivalent to
# ``s.replace(to_replace={'a': None}, value=None, method=None)``:
#
# >>> s.replace({'a': None})
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
# When ``value`` is not explicitly passed and `to_replace` is a scalar, list
# or tuple, `replace` uses the method parameter (default 'pad') to do the
# replacement. So this is why the 'a' values are being replaced by 10
# in rows 1 and 2 and 'b' in row 4 in this case.
#
# >>> s.replace('a')
# 0    10
# 1    10
# 2    10
# 3     b
# 4     b
# dtype: object
#
# On the other hand, if ``None`` is explicitly passed for ``value``, it will
# be respected:
#
# >>> s.replace('a', None)
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
#     .. versionchanged:: 1.4.0
#         Previously the explicit ``None`` was silently ignored.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.tools.numeric.to_numeric</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'errors': 'coerce'}</li></ul>
# <blockquote>
# <code>
# Convert argument to a numeric type.
#
# The default return dtype is `float64` or `int64`
# depending on the data supplied. Use the `downcast` parameter
# to obtain other dtypes.
#
# Please note that precision loss may occur if really large numbers
# are passed in. Due to the internal limitations of `ndarray`, if
# numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
# or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
# passed in, it is very likely they will be converted to float so that
# they can stored in an `ndarray`. These warnings apply similarly to
# `Series` since it internally leverages `ndarray`.
#
# Parameters
# ----------
# arg : scalar, list, tuple, 1-d array, or Series
#     Argument to be converted.
# errors : {'ignore', 'raise', 'coerce'}, default 'raise'
#     - If 'raise', then invalid parsing will raise an exception.
#     - If 'coerce', then invalid parsing will be set as NaN.
#     - If 'ignore', then invalid parsing will return the input.
# downcast : str, default None
#     Can be 'integer', 'signed', 'unsigned', or 'float'.
#     If not None, and if the data has been successfully cast to a
#     numerical dtype (or if the data was numeric to begin with),
#     downcast that resulting data to the smallest numerical dtype
#     possible according to the following rules:
#
#     - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
#     - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
#     - 'float': smallest float dtype (min.: np.float32)
#
#     As this behaviour is separate from the core conversion to
#     numeric values, any errors raised during the downcasting
#     will be surfaced regardless of the value of the 'errors' input.
#
#     In addition, downcasting will only occur if the size
#     of the resulting data's dtype is strictly larger than
#     the dtype it is to be cast to, so if none of the dtypes
#     checked satisfy that specification, no downcasting will be
#     performed on the data.
#
# Returns
# -------
# ret
#     Numeric if parsing succeeded.
#     Return type depends on input.  Series if Series, otherwise ndarray.
#
# See Also
# --------
# DataFrame.astype : Cast argument to a specified dtype.
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
# DataFrame.convert_dtypes : Convert dtypes.
#
# Examples
# --------
# Take separate series and convert to numeric, coercing when told to
#
# >>> s = pd.Series(['1.0', '2', -3])
# >>> pd.to_numeric(s)
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float64
# >>> pd.to_numeric(s, downcast='float')
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float32
# >>> pd.to_numeric(s, downcast='signed')
# 0    1
# 1    2
# 2   -3
# dtype: int8
# >>> s = pd.Series(['apple', '1.0', '2', -3])
# >>> pd.to_numeric(s, errors='ignore')
# 0    apple
# 1      1.0
# 2        2
# 3       -3
# dtype: object
# >>> pd.to_numeric(s, errors='coerce')
# 0    NaN
# 1    1.0
# 2    2.0
# 3   -3.0
# dtype: float64
#
# Downcasting of nullable integer and floating dtypes is supported:
#
# >>> s = pd.Series([1, 2, 3], dtype="Int64")
# >>> pd.to_numeric(s, downcast="integer")
# 0    1
# 1    2
# 2    3
# dtype: Int8
# >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
# >>> pd.to_numeric(s, downcast="float")
# 0    1.0
# 1    2.1
# 2    3.0
# dtype: Float32
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy.array</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li>
# <ul><li><details><summary><h4><s>Data Profiling and Exploratory Data Analysis</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Data Cleaning Filtering</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Data Sub-sampling and Train-test Splitting</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Feature Engineering</span></h2></summary>
# <ul>
#
# None
#
# </ul>
# </details></li>
# <ul><li><details><summary><h2>Feature Transformation</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Feature Transformation" Calls</u></b></summary>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 1</u></h3></summary><small><a href=#1>goto cell # 1</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 4</u></h3></summary><small><a href=#4>goto cell # 4</a></small>
# <ul>
#
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Feature Selection</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Model Building and Training</span></h2></summary>
# <ul>
#
# None
#
# </ul>
# </details></li>
# <ul><li><details><summary><h2>Model Training</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Model Training" Calls</u></b></summary>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.layers.core.dense.Dense</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [1] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Just your regular densely-connected NN layer.
#
# `Dense` implements the operation:
# `output = activation(dot(input, kernel) + bias)`
# where `activation` is the element-wise activation function
# passed as the `activation` argument, `kernel` is a weights matrix
# created by the layer, and `bias` is a bias vector created by the layer
# (only applicable if `use_bias` is `True`). These are all attributes of
# `Dense`.
#
# Note: If the input to the layer has a rank greater than 2, then `Dense`
# computes the dot product between the `inputs` and the `kernel` along the
# last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
# For example, if input has dimensions `(batch_size, d0, d1)`,
# then we create a `kernel` with shape `(d1, units)`, and the `kernel` operates
# along axis 2 of the `input`, on every sub-tensor of shape `(1, 1, d1)`
# (there are `batch_size * d0` such sub-tensors).
# The output in this case will have shape `(batch_size, d0, units)`.
#
# Besides, layer attributes cannot be modified after the layer has been called
# once (except the `trainable` attribute).
# When a popular kwarg `input_shape` is passed, then keras will create
# an input layer to insert before the current layer. This can be treated
# equivalent to explicitly defining an `InputLayer`.
#
# Example:
#
# >>> # Create a `Sequential` model and add a Dense layer as the first layer.
# >>> model = tf.keras.models.Sequential()
# >>> model.add(tf.keras.Input(shape=(16,)))
# >>> model.add(tf.keras.layers.Dense(32, activation='relu'))
# >>> # Now the model will take as input arrays of shape (None, 16)
# >>> # and output arrays of shape (None, 32).
# >>> # Note that after the first layer, you don't need to specify
# >>> # the size of the input anymore:
# >>> model.add(tf.keras.layers.Dense(32))
# >>> model.output_shape
# (None, 32)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     If you don't specify anything, no activation is applied
#     (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean, whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix.
#   bias_initializer: Initializer for the bias vector.
#   kernel_regularizer: Regularizer function applied to
#     the `kernel` weights matrix.
#   bias_regularizer: Regularizer function applied to the bias vector.
#   activity_regularizer: Regularizer function applied to
#     the output of the layer (its "activation").
#   kernel_constraint: Constraint function applied to
#     the `kernel` weights matrix.
#   bias_constraint: Constraint function applied to the bias vector.
#
# Input shape:
#   N-D tensor with shape: `(batch_size, ..., input_dim)`.
#   The most common situation would be
#   a 2D input with shape `(batch_size, input_dim)`.
#
# Output shape:
#   N-D tensor with shape: `(batch_size, ..., units)`.
#   For instance, for a 2D input with shape `(batch_size, input_dim)`,
#   the output would have shape `(batch_size, units)`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [[]] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# `Sequential` groups a linear stack of layers into a `tf.keras.Model`.
#
# `Sequential` provides training and inference features on this model.
#
# Examples:
#
# ```python
# Optionally, the first layer can receive an `input_shape` argument:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
# model.add(tf.keras.layers.Dense(4))
#
# This is identical to the following:
# model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape=(16,)))
# model.add(tf.keras.layers.Dense(8))
#
# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.weights not created yet
#
# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# model.add(tf.keras.layers.Dense(4))
# len(model.weights)
# Returns "4"
#
# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.build((None, 16))
# len(model.weights)
# Returns "4"
#
# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(1))
# model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
# model.fit(x, y, batch_size=32, epochs=10)
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.compile</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'optimizer': 'Adam', 'loss': 'binary_crossentropy', 'metrics': ['accuracy']}</li></ul>
# <blockquote>
# <code>
# Configures the model for training.
#
# Example:
#
# ```python
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.BinaryCrossentropy(),
#               metrics=[tf.keras.metrics.BinaryAccuracy(),
#                        tf.keras.metrics.FalseNegatives()])
# ```
#
# Args:
#     optimizer: String (name of optimizer) or optimizer instance. See
#       `tf.keras.optimizers`.
#     loss: Loss function. Maybe be a string (name of loss function), or
#       a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss
#       function is any callable with the signature `loss = fn(y_true,
#       y_pred)`, where `y_true` are the ground truth values, and
#       `y_pred` are the model's predictions.
#       `y_true` should have shape
#       `(batch_size, d0, .. dN)` (except in the case of
#       sparse loss functions such as
#       sparse categorical crossentropy which expects integer arrays of shape
#       `(batch_size, d0, .. dN-1)`).
#       `y_pred` should have shape `(batch_size, d0, .. dN)`.
#       The loss function should return a float tensor.
#       If a custom `Loss` instance is
#       used and reduction is set to `None`, return value has shape
#       `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss
#       values; otherwise, it is a scalar. If the model has multiple outputs,
#       you can use a different loss on each output by passing a dictionary
#       or a list of losses. The loss value that will be minimized by the
#       model will then be the sum of all individual losses, unless
#       `loss_weights` is specified.
#     metrics: List of metrics to be evaluated by the model during training
#       and testing. Each of this can be a string (name of a built-in
#       function), function or a `tf.keras.metrics.Metric` instance. See
#       `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A
#       function is any callable with the signature `result = fn(y_true,
#       y_pred)`. To specify different metrics for different outputs of a
#       multi-output model, you could also pass a dictionary, such as
#       `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
#       You can also pass a list to specify a metric or a list of metrics
#       for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]`
#       or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the
#       strings 'accuracy' or 'acc', we convert this to one of
#       `tf.keras.metrics.BinaryAccuracy`,
#       `tf.keras.metrics.CategoricalAccuracy`,
#       `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss
#       function used and the model output shape. We do a similar
#       conversion for the strings 'crossentropy' and 'ce' as well.
#     loss_weights: Optional list or dictionary specifying scalar coefficients
#       (Python floats) to weight the loss contributions of different model
#       outputs. The loss value that will be minimized by the model will then
#       be the *weighted sum* of all individual losses, weighted by the
#       `loss_weights` coefficients.
#         If a list, it is expected to have a 1:1 mapping to the model's
#           outputs. If a dict, it is expected to map output names (strings)
#           to scalar coefficients.
#     weighted_metrics: List of metrics to be evaluated and weighted by
#       `sample_weight` or `class_weight` during training and testing.
#     run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s
#       logic will not be wrapped in a `tf.function`. Recommended to leave
#       this as `None` unless your `Model` cannot be run inside a
#       `tf.function`. `run_eagerly=True` is not supported when using
#       `tf.distribute.experimental.ParameterServerStrategy`.
#     steps_per_execution: Int. Defaults to 1. The number of batches to run
#       during each `tf.function` call. Running multiple batches inside a
#       single `tf.function` call can greatly improve performance on TPUs or
#       small models with a large Python overhead. At most, one full epoch
#       will be run each execution. If a number larger than the size of the
#       epoch is passed, the execution will be truncated to the size of the
#       epoch. Note that if `steps_per_execution` is set to `N`,
#       `Callback.on_batch_begin` and `Callback.on_batch_end` methods will
#       only be called every `N` batches (i.e. before/after each `tf.function`
#       execution).
#     jit_compile: If `True`, compile the model training step with XLA.
#       [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for
#       machine learning.
#       `jit_compile` is not enabled for by default.
#       This option cannot be enabled with `run_eagerly=True`.
#       Note that `jit_compile=True` is
#       may not necessarily work for all models.
#       For more information on supported operations please refer to the
#       [XLA documentation](https://www.tensorflow.org/xla).
#       Also refer to
#       [known XLA issues](https://www.tensorflow.org/xla/known_issues) for
#       more details.
#     **kwargs: Arguments supported for backwards compatibility only.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.fit</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'validation_split': 0.15, 'epochs': 100, 'batch_size': 5, 'callbacks': []}</li></ul>
# <blockquote>
# <code>
# Trains the model for a fixed number of epochs (iterations on a dataset).
#
# Args:
#     x: Input data. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A dict mapping input names to the corresponding array/tensors,
#         if the model has named inputs.
#       - A `tf.data` dataset. Should return a tuple
#         of either `(inputs, targets)` or
#         `(inputs, targets, sample_weights)`.
#       - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
#         or `(inputs, targets, sample_weights)`.
#       - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
#         callable that takes a single argument of type
#         `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
#         `DatasetCreator` should be used when users prefer to specify the
#         per-replica batching and sharding logic for the `Dataset`.
#         See `tf.keras.utils.experimental.DatasetCreator` doc for more
#         information.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given below. If using
#       `tf.distribute.experimental.ParameterServerStrategy`, only
#       `DatasetCreator` type is supported for `x`.
#     y: Target data. Like the input data `x`,
#       it could be either Numpy array(s) or TensorFlow tensor(s).
#       It should be consistent with `x` (you cannot have Numpy inputs and
#       tensor targets, or inversely). If `x` is a dataset, generator,
#       or `keras.utils.Sequence` instance, `y` should
#       not be specified (since targets will be obtained from `x`).
#     batch_size: Integer or `None`.
#         Number of samples per gradient update.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     epochs: Integer. Number of epochs to train the model.
#         An epoch is an iteration over the entire `x` and `y`
#         data provided
#         (unless the `steps_per_epoch` flag is set to
#         something other than None).
#         Note that in conjunction with `initial_epoch`,
#         `epochs` is to be understood as "final epoch".
#         The model is not trained for a number of iterations
#         given by `epochs`, but merely until the epoch
#         of index `epochs` is reached.
#     verbose: 'auto', 0, 1, or 2. Verbosity mode.
#         0 = silent, 1 = progress bar, 2 = one line per epoch.
#         'auto' defaults to 1 for most cases, but 2 when used with
#         `ParameterServerStrategy`. Note that the progress bar is not
#         particularly useful when logged to a file, so verbose=2 is
#         recommended when not running interactively (eg, in a production
#         environment).
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during training.
#         See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger`
#         and `tf.keras.callbacks.History` callbacks are created automatically
#         and need not be passed into `model.fit`.
#         `tf.keras.callbacks.ProgbarLogger` is created or not based on
#         `verbose` argument to `model.fit`.
#         Callbacks with batch-level calls are currently unsupported with
#         `tf.distribute.experimental.ParameterServerStrategy`, and users are
#         advised to implement epoch-level calls instead with an appropriate
#         `steps_per_epoch` value.
#     validation_split: Float between 0 and 1.
#         Fraction of the training data to be used as validation data.
#         The model will set apart this fraction of the training data,
#         will not train on it, and will evaluate
#         the loss and any model metrics
#         on this data at the end of each epoch.
#         The validation data is selected from the last samples
#         in the `x` and `y` data provided, before shuffling. This argument is
#         not supported when `x` is a dataset, generator or
#        `keras.utils.Sequence` instance.
#         `validation_split` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     validation_data: Data on which to evaluate
#         the loss and any model metrics at the end of each epoch.
#         The model will not be trained on this data. Thus, note the fact
#         that the validation loss of data provided using `validation_split`
#         or `validation_data` is not affected by regularization layers like
#         noise and dropout.
#         `validation_data` will override `validation_split`.
#         `validation_data` could be:
#           - A tuple `(x_val, y_val)` of Numpy arrays or tensors.
#           - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays.
#           - A `tf.data.Dataset`.
#           - A Python generator or `keras.utils.Sequence` returning
#           `(inputs, targets)` or `(inputs, targets, sample_weights)`.
#         `validation_data` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     shuffle: Boolean (whether to shuffle the training data
#         before each epoch) or str (for 'batch'). This argument is ignored
#         when `x` is a generator or an object of tf.data.Dataset.
#         'batch' is a special option for dealing
#         with the limitations of HDF5 data; it shuffles in batch-sized
#         chunks. Has no effect when `steps_per_epoch` is not `None`.
#     class_weight: Optional dictionary mapping class indices (integers)
#         to a weight (float) value, used for weighting the loss function
#         (during training only).
#         This can be useful to tell the model to
#         "pay more attention" to samples from
#         an under-represented class.
#     sample_weight: Optional Numpy array of weights for
#         the training samples, used for weighting the loss function
#         (during training only). You can either pass a flat (1D)
#         Numpy array with the same length as the input samples
#         (1:1 mapping between weights and samples),
#         or in the case of temporal data,
#         you can pass a 2D array with shape
#         `(samples, sequence_length)`,
#         to apply a different weight to every timestep of every sample. This
#         argument is not supported when `x` is a dataset, generator, or
#        `keras.utils.Sequence` instance, instead provide the sample_weights
#         as the third element of `x`.
#     initial_epoch: Integer.
#         Epoch at which to start training
#         (useful for resuming a previous training run).
#     steps_per_epoch: Integer or `None`.
#         Total number of steps (batches of samples)
#         before declaring one epoch finished and starting the
#         next epoch. When training with input tensors such as
#         TensorFlow data tensors, the default `None` is equal to
#         the number of samples in your dataset divided by
#         the batch size, or 1 if that cannot be determined. If x is a
#         `tf.data` dataset, and 'steps_per_epoch'
#         is None, the epoch will run until the input dataset is exhausted.
#         When passing an infinitely repeating dataset, you must specify the
#         `steps_per_epoch` argument. If `steps_per_epoch=-1` the training
#         will run indefinitely with an infinitely repeating dataset.
#         This argument is not supported with array inputs.
#         When using `tf.distribute.experimental.ParameterServerStrategy`:
#           * `steps_per_epoch=None` is not supported.
#     validation_steps: Only relevant if `validation_data` is provided and
#         is a `tf.data` dataset. Total number of steps (batches of
#         samples) to draw before stopping when performing validation
#         at the end of every epoch. If 'validation_steps' is None, validation
#         will run until the `validation_data` dataset is exhausted. In the
#         case of an infinitely repeated dataset, it will run into an
#         infinite loop. If 'validation_steps' is specified and only part of
#         the dataset will be consumed, the evaluation will start from the
#         beginning of the dataset at each epoch. This ensures that the same
#         validation samples are used every time.
#     validation_batch_size: Integer or `None`.
#         Number of samples per validation batch.
#         If unspecified, will default to `batch_size`.
#         Do not specify the `validation_batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     validation_freq: Only relevant if validation data is provided. Integer
#         or `collections.abc.Container` instance (e.g. list, tuple, etc.).
#         If an integer, specifies how many training epochs to run before a
#         new validation run is performed, e.g. `validation_freq=2` runs
#         validation every 2 epochs. If a Container, specifies the epochs on
#         which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
#         validation at the end of the 1st, 2nd, and 10th epochs.
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up
#         when using process-based threading. If unspecified, `workers`
#         will default to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# Unpacking behavior for iterator-like inputs:
#     A common pattern is to pass a tf.data.Dataset, generator, or
#   tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
#   yield not only features (x) but optionally targets (y) and sample weights.
#   Keras requires that the output of such iterator-likes be unambiguous. The
#   iterator should return a tuple of length 1, 2, or 3, where the optional
#   second and third elements will be used for y and sample_weight
#   respectively. Any other type provided will be wrapped in a length one
#   tuple, effectively treating everything as 'x'. When yielding dicts, they
#   should still adhere to the top-level tuple structure.
#   e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
#   features, targets, and weights from the keys of a single dict.
#     A notable unsupported data type is the namedtuple. The reason is that
#   it behaves like both an ordered datatype (tuple) and a mapping
#   datatype (dict). So given a namedtuple of the form:
#       `namedtuple("example_tuple", ["y", "x"])`
#   it is ambiguous whether to reverse the order of the elements when
#   interpreting the value. Even worse is a tuple of the form:
#       `namedtuple("other_tuple", ["x", "y", "z"])`
#   where it is unclear if the tuple was intended to be unpacked into x, y,
#   and sample_weight or passed through as a single element to `x`. As a
#   result the data processing code will simply raise a ValueError if it
#   encounters a namedtuple. (Along with instructions to remedy the issue.)
#
# Returns:
#     A `History` object. Its `History.history` attribute is
#     a record of training loss values and metrics values
#     at successive epochs, as well as validation loss values
#     and validation metrics values (if applicable).
#
# Raises:
#     RuntimeError: 1. If the model was never compiled or,
#     2. If `model.fit` is  wrapped in `tf.function`.
#
#     ValueError: In case of mismatch between the provided input data
#         and what the model expects or when the input data is empty.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 3</u></h3></summary><small><a href=#3>goto cell # 3</a></small>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.layers.core.dense.Dense</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [1] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# Just your regular densely-connected NN layer.
#
# `Dense` implements the operation:
# `output = activation(dot(input, kernel) + bias)`
# where `activation` is the element-wise activation function
# passed as the `activation` argument, `kernel` is a weights matrix
# created by the layer, and `bias` is a bias vector created by the layer
# (only applicable if `use_bias` is `True`). These are all attributes of
# `Dense`.
#
# Note: If the input to the layer has a rank greater than 2, then `Dense`
# computes the dot product between the `inputs` and the `kernel` along the
# last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
# For example, if input has dimensions `(batch_size, d0, d1)`,
# then we create a `kernel` with shape `(d1, units)`, and the `kernel` operates
# along axis 2 of the `input`, on every sub-tensor of shape `(1, 1, d1)`
# (there are `batch_size * d0` such sub-tensors).
# The output in this case will have shape `(batch_size, d0, units)`.
#
# Besides, layer attributes cannot be modified after the layer has been called
# once (except the `trainable` attribute).
# When a popular kwarg `input_shape` is passed, then keras will create
# an input layer to insert before the current layer. This can be treated
# equivalent to explicitly defining an `InputLayer`.
#
# Example:
#
# >>> # Create a `Sequential` model and add a Dense layer as the first layer.
# >>> model = tf.keras.models.Sequential()
# >>> model.add(tf.keras.Input(shape=(16,)))
# >>> model.add(tf.keras.layers.Dense(32, activation='relu'))
# >>> # Now the model will take as input arrays of shape (None, 16)
# >>> # and output arrays of shape (None, 32).
# >>> # Note that after the first layer, you don't need to specify
# >>> # the size of the input anymore:
# >>> model.add(tf.keras.layers.Dense(32))
# >>> model.output_shape
# (None, 32)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     If you don't specify anything, no activation is applied
#     (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean, whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix.
#   bias_initializer: Initializer for the bias vector.
#   kernel_regularizer: Regularizer function applied to
#     the `kernel` weights matrix.
#   bias_regularizer: Regularizer function applied to the bias vector.
#   activity_regularizer: Regularizer function applied to
#     the output of the layer (its "activation").
#   kernel_constraint: Constraint function applied to
#     the `kernel` weights matrix.
#   bias_constraint: Constraint function applied to the bias vector.
#
# Input shape:
#   N-D tensor with shape: `(batch_size, ..., input_dim)`.
#   The most common situation would be
#   a 2D input with shape `(batch_size, input_dim)`.
#
# Output shape:
#   N-D tensor with shape: `(batch_size, ..., units)`.
#   For instance, for a 2D input with shape `(batch_size, input_dim)`,
#   the output would have shape `(batch_size, units)`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [[]] | <b>Kwargs:</b> {}</li></ul>
# <blockquote>
# <code>
# `Sequential` groups a linear stack of layers into a `tf.keras.Model`.
#
# `Sequential` provides training and inference features on this model.
#
# Examples:
#
# ```python
# Optionally, the first layer can receive an `input_shape` argument:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
# model.add(tf.keras.layers.Dense(4))
#
# This is identical to the following:
# model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape=(16,)))
# model.add(tf.keras.layers.Dense(8))
#
# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.weights not created yet
#
# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# model.add(tf.keras.layers.Dense(4))
# len(model.weights)
# Returns "4"
#
# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.build((None, 16))
# len(model.weights)
# Returns "4"
#
# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(1))
# model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
# model.fit(x, y, batch_size=32, epochs=10)
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.compile</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'optimizer': 'Adam', 'loss': 'binary_crossentropy', 'metrics': ['accuracy']}</li></ul>
# <blockquote>
# <code>
# Configures the model for training.
#
# Example:
#
# ```python
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.BinaryCrossentropy(),
#               metrics=[tf.keras.metrics.BinaryAccuracy(),
#                        tf.keras.metrics.FalseNegatives()])
# ```
#
# Args:
#     optimizer: String (name of optimizer) or optimizer instance. See
#       `tf.keras.optimizers`.
#     loss: Loss function. Maybe be a string (name of loss function), or
#       a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss
#       function is any callable with the signature `loss = fn(y_true,
#       y_pred)`, where `y_true` are the ground truth values, and
#       `y_pred` are the model's predictions.
#       `y_true` should have shape
#       `(batch_size, d0, .. dN)` (except in the case of
#       sparse loss functions such as
#       sparse categorical crossentropy which expects integer arrays of shape
#       `(batch_size, d0, .. dN-1)`).
#       `y_pred` should have shape `(batch_size, d0, .. dN)`.
#       The loss function should return a float tensor.
#       If a custom `Loss` instance is
#       used and reduction is set to `None`, return value has shape
#       `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss
#       values; otherwise, it is a scalar. If the model has multiple outputs,
#       you can use a different loss on each output by passing a dictionary
#       or a list of losses. The loss value that will be minimized by the
#       model will then be the sum of all individual losses, unless
#       `loss_weights` is specified.
#     metrics: List of metrics to be evaluated by the model during training
#       and testing. Each of this can be a string (name of a built-in
#       function), function or a `tf.keras.metrics.Metric` instance. See
#       `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A
#       function is any callable with the signature `result = fn(y_true,
#       y_pred)`. To specify different metrics for different outputs of a
#       multi-output model, you could also pass a dictionary, such as
#       `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
#       You can also pass a list to specify a metric or a list of metrics
#       for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]`
#       or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the
#       strings 'accuracy' or 'acc', we convert this to one of
#       `tf.keras.metrics.BinaryAccuracy`,
#       `tf.keras.metrics.CategoricalAccuracy`,
#       `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss
#       function used and the model output shape. We do a similar
#       conversion for the strings 'crossentropy' and 'ce' as well.
#     loss_weights: Optional list or dictionary specifying scalar coefficients
#       (Python floats) to weight the loss contributions of different model
#       outputs. The loss value that will be minimized by the model will then
#       be the *weighted sum* of all individual losses, weighted by the
#       `loss_weights` coefficients.
#         If a list, it is expected to have a 1:1 mapping to the model's
#           outputs. If a dict, it is expected to map output names (strings)
#           to scalar coefficients.
#     weighted_metrics: List of metrics to be evaluated and weighted by
#       `sample_weight` or `class_weight` during training and testing.
#     run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s
#       logic will not be wrapped in a `tf.function`. Recommended to leave
#       this as `None` unless your `Model` cannot be run inside a
#       `tf.function`. `run_eagerly=True` is not supported when using
#       `tf.distribute.experimental.ParameterServerStrategy`.
#     steps_per_execution: Int. Defaults to 1. The number of batches to run
#       during each `tf.function` call. Running multiple batches inside a
#       single `tf.function` call can greatly improve performance on TPUs or
#       small models with a large Python overhead. At most, one full epoch
#       will be run each execution. If a number larger than the size of the
#       epoch is passed, the execution will be truncated to the size of the
#       epoch. Note that if `steps_per_execution` is set to `N`,
#       `Callback.on_batch_begin` and `Callback.on_batch_end` methods will
#       only be called every `N` batches (i.e. before/after each `tf.function`
#       execution).
#     jit_compile: If `True`, compile the model training step with XLA.
#       [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for
#       machine learning.
#       `jit_compile` is not enabled for by default.
#       This option cannot be enabled with `run_eagerly=True`.
#       Note that `jit_compile=True` is
#       may not necessarily work for all models.
#       For more information on supported operations please refer to the
#       [XLA documentation](https://www.tensorflow.org/xla).
#       Also refer to
#       [known XLA issues](https://www.tensorflow.org/xla/known_issues) for
#       more details.
#     **kwargs: Arguments supported for backwards compatibility only.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.fit</u> | <b>(See Args)</b> </summary> <ul><li><b>Args:</b> [] | <b>Kwargs:</b> {'validation_split': 0.15, 'epochs': 100, 'batch_size': 5, 'callbacks': []}</li></ul>
# <blockquote>
# <code>
# Trains the model for a fixed number of epochs (iterations on a dataset).
#
# Args:
#     x: Input data. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A dict mapping input names to the corresponding array/tensors,
#         if the model has named inputs.
#       - A `tf.data` dataset. Should return a tuple
#         of either `(inputs, targets)` or
#         `(inputs, targets, sample_weights)`.
#       - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
#         or `(inputs, targets, sample_weights)`.
#       - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
#         callable that takes a single argument of type
#         `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
#         `DatasetCreator` should be used when users prefer to specify the
#         per-replica batching and sharding logic for the `Dataset`.
#         See `tf.keras.utils.experimental.DatasetCreator` doc for more
#         information.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given below. If using
#       `tf.distribute.experimental.ParameterServerStrategy`, only
#       `DatasetCreator` type is supported for `x`.
#     y: Target data. Like the input data `x`,
#       it could be either Numpy array(s) or TensorFlow tensor(s).
#       It should be consistent with `x` (you cannot have Numpy inputs and
#       tensor targets, or inversely). If `x` is a dataset, generator,
#       or `keras.utils.Sequence` instance, `y` should
#       not be specified (since targets will be obtained from `x`).
#     batch_size: Integer or `None`.
#         Number of samples per gradient update.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     epochs: Integer. Number of epochs to train the model.
#         An epoch is an iteration over the entire `x` and `y`
#         data provided
#         (unless the `steps_per_epoch` flag is set to
#         something other than None).
#         Note that in conjunction with `initial_epoch`,
#         `epochs` is to be understood as "final epoch".
#         The model is not trained for a number of iterations
#         given by `epochs`, but merely until the epoch
#         of index `epochs` is reached.
#     verbose: 'auto', 0, 1, or 2. Verbosity mode.
#         0 = silent, 1 = progress bar, 2 = one line per epoch.
#         'auto' defaults to 1 for most cases, but 2 when used with
#         `ParameterServerStrategy`. Note that the progress bar is not
#         particularly useful when logged to a file, so verbose=2 is
#         recommended when not running interactively (eg, in a production
#         environment).
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during training.
#         See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger`
#         and `tf.keras.callbacks.History` callbacks are created automatically
#         and need not be passed into `model.fit`.
#         `tf.keras.callbacks.ProgbarLogger` is created or not based on
#         `verbose` argument to `model.fit`.
#         Callbacks with batch-level calls are currently unsupported with
#         `tf.distribute.experimental.ParameterServerStrategy`, and users are
#         advised to implement epoch-level calls instead with an appropriate
#         `steps_per_epoch` value.
#     validation_split: Float between 0 and 1.
#         Fraction of the training data to be used as validation data.
#         The model will set apart this fraction of the training data,
#         will not train on it, and will evaluate
#         the loss and any model metrics
#         on this data at the end of each epoch.
#         The validation data is selected from the last samples
#         in the `x` and `y` data provided, before shuffling. This argument is
#         not supported when `x` is a dataset, generator or
#        `keras.utils.Sequence` instance.
#         `validation_split` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     validation_data: Data on which to evaluate
#         the loss and any model metrics at the end of each epoch.
#         The model will not be trained on this data. Thus, note the fact
#         that the validation loss of data provided using `validation_split`
#         or `validation_data` is not affected by regularization layers like
#         noise and dropout.
#         `validation_data` will override `validation_split`.
#         `validation_data` could be:
#           - A tuple `(x_val, y_val)` of Numpy arrays or tensors.
#           - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays.
#           - A `tf.data.Dataset`.
#           - A Python generator or `keras.utils.Sequence` returning
#           `(inputs, targets)` or `(inputs, targets, sample_weights)`.
#         `validation_data` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     shuffle: Boolean (whether to shuffle the training data
#         before each epoch) or str (for 'batch'). This argument is ignored
#         when `x` is a generator or an object of tf.data.Dataset.
#         'batch' is a special option for dealing
#         with the limitations of HDF5 data; it shuffles in batch-sized
#         chunks. Has no effect when `steps_per_epoch` is not `None`.
#     class_weight: Optional dictionary mapping class indices (integers)
#         to a weight (float) value, used for weighting the loss function
#         (during training only).
#         This can be useful to tell the model to
#         "pay more attention" to samples from
#         an under-represented class.
#     sample_weight: Optional Numpy array of weights for
#         the training samples, used for weighting the loss function
#         (during training only). You can either pass a flat (1D)
#         Numpy array with the same length as the input samples
#         (1:1 mapping between weights and samples),
#         or in the case of temporal data,
#         you can pass a 2D array with shape
#         `(samples, sequence_length)`,
#         to apply a different weight to every timestep of every sample. This
#         argument is not supported when `x` is a dataset, generator, or
#        `keras.utils.Sequence` instance, instead provide the sample_weights
#         as the third element of `x`.
#     initial_epoch: Integer.
#         Epoch at which to start training
#         (useful for resuming a previous training run).
#     steps_per_epoch: Integer or `None`.
#         Total number of steps (batches of samples)
#         before declaring one epoch finished and starting the
#         next epoch. When training with input tensors such as
#         TensorFlow data tensors, the default `None` is equal to
#         the number of samples in your dataset divided by
#         the batch size, or 1 if that cannot be determined. If x is a
#         `tf.data` dataset, and 'steps_per_epoch'
#         is None, the epoch will run until the input dataset is exhausted.
#         When passing an infinitely repeating dataset, you must specify the
#         `steps_per_epoch` argument. If `steps_per_epoch=-1` the training
#         will run indefinitely with an infinitely repeating dataset.
#         This argument is not supported with array inputs.
#         When using `tf.distribute.experimental.ParameterServerStrategy`:
#           * `steps_per_epoch=None` is not supported.
#     validation_steps: Only relevant if `validation_data` is provided and
#         is a `tf.data` dataset. Total number of steps (batches of
#         samples) to draw before stopping when performing validation
#         at the end of every epoch. If 'validation_steps' is None, validation
#         will run until the `validation_data` dataset is exhausted. In the
#         case of an infinitely repeated dataset, it will run into an
#         infinite loop. If 'validation_steps' is specified and only part of
#         the dataset will be consumed, the evaluation will start from the
#         beginning of the dataset at each epoch. This ensures that the same
#         validation samples are used every time.
#     validation_batch_size: Integer or `None`.
#         Number of samples per validation batch.
#         If unspecified, will default to `batch_size`.
#         Do not specify the `validation_batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     validation_freq: Only relevant if validation data is provided. Integer
#         or `collections.abc.Container` instance (e.g. list, tuple, etc.).
#         If an integer, specifies how many training epochs to run before a
#         new validation run is performed, e.g. `validation_freq=2` runs
#         validation every 2 epochs. If a Container, specifies the epochs on
#         which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
#         validation at the end of the 1st, 2nd, and 10th epochs.
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up
#         when using process-based threading. If unspecified, `workers`
#         will default to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# Unpacking behavior for iterator-like inputs:
#     A common pattern is to pass a tf.data.Dataset, generator, or
#   tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
#   yield not only features (x) but optionally targets (y) and sample weights.
#   Keras requires that the output of such iterator-likes be unambiguous. The
#   iterator should return a tuple of length 1, 2, or 3, where the optional
#   second and third elements will be used for y and sample_weight
#   respectively. Any other type provided will be wrapped in a length one
#   tuple, effectively treating everything as 'x'. When yielding dicts, they
#   should still adhere to the top-level tuple structure.
#   e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
#   features, targets, and weights from the keys of a single dict.
#     A notable unsupported data type is the namedtuple. The reason is that
#   it behaves like both an ordered datatype (tuple) and a mapping
#   datatype (dict). So given a namedtuple of the form:
#       `namedtuple("example_tuple", ["y", "x"])`
#   it is ambiguous whether to reverse the order of the elements when
#   interpreting the value. Even worse is a tuple of the form:
#       `namedtuple("other_tuple", ["x", "y", "z"])`
#   where it is unclear if the tuple was intended to be unpacked into x, y,
#   and sample_weight or passed through as a single element to `x`. As a
#   result the data processing code will simply raise a ValueError if it
#   encounters a namedtuple. (Along with instructions to remedy the issue.)
#
# Returns:
#     A `History` object. Its `History.history` attribute is
#     a record of training loss values and metrics values
#     at successive epochs, as well as validation loss values
#     and validation metrics values (if applicable).
#
# Raises:
#     RuntimeError: 1. If the model was never compiled or,
#     2. If `model.fit` is  wrapped in `tf.function`.
#
#     ValueError: In case of mismatch between the provided input data
#         and what the model expects or when the input data is empty.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Model Parameter Tuning</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h2>Model Validation and Assembling</h2></summary>
# <ul>
#
# <li><details><summary><b><u>View All "Model Validation and Assembling" Calls</u></b></summary>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.engine.training.Model.predict</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Generates output predictions for the input samples.
#
# Computation is done in batches. This method is designed for batch processing
# of large numbers of inputs. It is not intended for use inside of loops
# that iterate over your data and process small numbers of inputs at a time.
#
# For small numbers of inputs that fit in one batch,
# directly use `__call__()` for faster execution, e.g.,
# `model(x)`, or `model(x, training=False)` if you have layers such as
# `tf.keras.layers.BatchNormalization` that behave differently during
# inference. You may pair the individual model call with a `tf.function`
# for additional performance inside your inner loop.
# If you need access to numpy array values instead of tensors after your
# model call, you can use `tensor.numpy()` to get the numpy array value of
# an eager tensor.
#
# Also, note the fact that test loss is not affected by
# regularization layers like noise and dropout.
#
# Note: See [this FAQ entry](
# https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call)
# for more details about the difference between `Model` methods `predict()`
# and `__call__()`.
#
# Args:
#     x: Input samples. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A `tf.data` dataset.
#       - A generator or `keras.utils.Sequence` instance.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given in the `Unpacking behavior
#       for iterator-like inputs` section of `Model.fit`.
#     batch_size: Integer or `None`.
#         Number of samples per batch.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of dataset, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     verbose: Verbosity mode, 0 or 1.
#     steps: Total number of steps (batches of samples)
#         before declaring the prediction round finished.
#         Ignored with the default value of `None`. If x is a `tf.data`
#         dataset and `steps` is None, `predict()` will
#         run until the input dataset is exhausted.
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during prediction.
#         See [callbacks](/api_docs/python/tf/keras/callbacks).
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up when using
#         process-based threading. If unspecified, `workers` will default
#         to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# See the discussion of `Unpacking behavior for iterator-like inputs` for
# `Model.fit`. Note that Model.predict uses the same interpretation rules as
# `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
# three methods.
#
# Returns:
#     Numpy array(s) of predictions.
#
# Raises:
#     RuntimeError: If `model.predict` is wrapped in a `tf.function`.
#     ValueError: In case of mismatch between the provided
#         input data and the model's expectations,
#         or in case a stateful model receives a number of samples
#         that is not a multiple of the batch size.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
# <li><details open><summary><h3><u>Cell # 4</u></h3></summary><small><a href=#4>goto cell # 4</a></small>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.engine.training.Model.predict</u> | (No Args Found) </summary>
# <blockquote>
# <code>
# Generates output predictions for the input samples.
#
# Computation is done in batches. This method is designed for batch processing
# of large numbers of inputs. It is not intended for use inside of loops
# that iterate over your data and process small numbers of inputs at a time.
#
# For small numbers of inputs that fit in one batch,
# directly use `__call__()` for faster execution, e.g.,
# `model(x)`, or `model(x, training=False)` if you have layers such as
# `tf.keras.layers.BatchNormalization` that behave differently during
# inference. You may pair the individual model call with a `tf.function`
# for additional performance inside your inner loop.
# If you need access to numpy array values instead of tensors after your
# model call, you can use `tensor.numpy()` to get the numpy array value of
# an eager tensor.
#
# Also, note the fact that test loss is not affected by
# regularization layers like noise and dropout.
#
# Note: See [this FAQ entry](
# https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call)
# for more details about the difference between `Model` methods `predict()`
# and `__call__()`.
#
# Args:
#     x: Input samples. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A `tf.data` dataset.
#       - A generator or `keras.utils.Sequence` instance.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given in the `Unpacking behavior
#       for iterator-like inputs` section of `Model.fit`.
#     batch_size: Integer or `None`.
#         Number of samples per batch.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of dataset, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     verbose: Verbosity mode, 0 or 1.
#     steps: Total number of steps (batches of samples)
#         before declaring the prediction round finished.
#         Ignored with the default value of `None`. If x is a `tf.data`
#         dataset and `steps` is None, `predict()` will
#         run until the input dataset is exhausted.
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during prediction.
#         See [callbacks](/api_docs/python/tf/keras/callbacks).
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up when using
#         process-based threading. If unspecified, `workers` will default
#         to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# See the discussion of `Unpacking behavior for iterator-like inputs` for
# `Model.fit`. Note that Model.predict uses the same interpretation rules as
# `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
# three methods.
#
# Returns:
#     Numpy array(s) of predictions.
#
# Raises:
#     RuntimeError: If `model.predict` is wrapped in a `tf.function`.
#     ValueError: In case of mismatch between the provided
#         input data and the model's expectations,
#         or in case a stateful model receives a number of samples
#         that is not a multiple of the batch size.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details></li>
#
# </ul>
# </details></li></ul>
# </ul>
# <hr>
#
# <details><summary><h2>View All ML API Calls in Notebook</h2></summary>
# <ul>
#
# <li> <b>keras</b>
# <ul>
# <li>
# <details><summary><u>keras.api._v2.keras.callbacks</u></summary>
# <blockquote>
# <code>
# Public API for tf.keras.callbacks namespace.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.api._v2.keras.layers</u></summary>
# <blockquote>
# <code>
# Public API for tf.keras.layers namespace.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.callbacks.Callback</u></summary>
# <blockquote>
# <code>
# Abstract base class used to build new callbacks.
#
# Callbacks can be passed to keras methods such as `fit`, `evaluate`, and
# `predict` in order to hook into the various stages of the model training and
# inference lifecycle.
#
# To create a custom callback, subclass `keras.callbacks.Callback` and override
# the method associated with the stage of interest. See
# https://www.tensorflow.org/guide/keras/custom_callback for more information.
#
# Example:
#
# >>> training_finished = False
# >>> class MyCallback(tf.keras.callbacks.Callback):
# ...   def on_train_end(self, logs=None):
# ...     global training_finished
# ...     training_finished = True
# >>> model = tf.keras.Sequential([tf.keras.layers.Dense(1, input_shape=(1,))])
# >>> model.compile(loss='mean_squared_error')
# >>> model.fit(tf.constant([[1.0]]), tf.constant([[1.0]]),
# ...           callbacks=[MyCallback()])
# >>> assert training_finished == True
#
# If you want to use `Callback` objects in a custom training loop:
#
# 1. You should pack all your callbacks into a single `callbacks.CallbackList`
#    so they can all be called together.
# 2. You will need to manually call all the `on_*` methods at the appropriate
#    locations in your loop. Like this:
#
#    ```
#    callbacks =  tf.keras.callbacks.CallbackList([...])
#    callbacks.append(...)
#
#    callbacks.on_train_begin(...)
#    for epoch in range(EPOCHS):
#      callbacks.on_epoch_begin(epoch)
#      for i, data in dataset.enumerate():
#        callbacks.on_train_batch_begin(i)
#        batch_logs = model.train_step(data)
#        callbacks.on_train_batch_end(i, batch_logs)
#      epoch_logs = ...
#      callbacks.on_epoch_end(epoch, epoch_logs)
#    final_logs=...
#    callbacks.on_train_end(final_logs)
#    ```
#
# Attributes:
#     params: Dict. Training parameters
#         (eg. verbosity, batch size, number of epochs...).
#     model: Instance of `keras.models.Model`.
#         Reference of the model being trained.
#
# The `logs` dictionary that callback methods
# take as argument will contain keys for quantities relevant to
# the current batch or epoch (see method-specific docstrings).
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential</u></summary>
# <blockquote>
# <code>
# `Sequential` groups a linear stack of layers into a `tf.keras.Model`.
#
# `Sequential` provides training and inference features on this model.
#
# Examples:
#
# ```python
# Optionally, the first layer can receive an `input_shape` argument:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
# model.add(tf.keras.layers.Dense(4))
#
# This is identical to the following:
# model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape=(16,)))
# model.add(tf.keras.layers.Dense(8))
#
# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.weights not created yet
#
# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# model.add(tf.keras.layers.Dense(4))
# len(model.weights)
# Returns "4"
#
# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.build((None, 16))
# len(model.weights)
# Returns "4"
#
# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(1))
# model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
# model.fit(x, y, batch_size=32, epochs=10)
# ```
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.compile</u></summary>
# <blockquote>
# <code>
# Configures the model for training.
#
# Example:
#
# ```python
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.BinaryCrossentropy(),
#               metrics=[tf.keras.metrics.BinaryAccuracy(),
#                        tf.keras.metrics.FalseNegatives()])
# ```
#
# Args:
#     optimizer: String (name of optimizer) or optimizer instance. See
#       `tf.keras.optimizers`.
#     loss: Loss function. Maybe be a string (name of loss function), or
#       a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss
#       function is any callable with the signature `loss = fn(y_true,
#       y_pred)`, where `y_true` are the ground truth values, and
#       `y_pred` are the model's predictions.
#       `y_true` should have shape
#       `(batch_size, d0, .. dN)` (except in the case of
#       sparse loss functions such as
#       sparse categorical crossentropy which expects integer arrays of shape
#       `(batch_size, d0, .. dN-1)`).
#       `y_pred` should have shape `(batch_size, d0, .. dN)`.
#       The loss function should return a float tensor.
#       If a custom `Loss` instance is
#       used and reduction is set to `None`, return value has shape
#       `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss
#       values; otherwise, it is a scalar. If the model has multiple outputs,
#       you can use a different loss on each output by passing a dictionary
#       or a list of losses. The loss value that will be minimized by the
#       model will then be the sum of all individual losses, unless
#       `loss_weights` is specified.
#     metrics: List of metrics to be evaluated by the model during training
#       and testing. Each of this can be a string (name of a built-in
#       function), function or a `tf.keras.metrics.Metric` instance. See
#       `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A
#       function is any callable with the signature `result = fn(y_true,
#       y_pred)`. To specify different metrics for different outputs of a
#       multi-output model, you could also pass a dictionary, such as
#       `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
#       You can also pass a list to specify a metric or a list of metrics
#       for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]`
#       or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the
#       strings 'accuracy' or 'acc', we convert this to one of
#       `tf.keras.metrics.BinaryAccuracy`,
#       `tf.keras.metrics.CategoricalAccuracy`,
#       `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss
#       function used and the model output shape. We do a similar
#       conversion for the strings 'crossentropy' and 'ce' as well.
#     loss_weights: Optional list or dictionary specifying scalar coefficients
#       (Python floats) to weight the loss contributions of different model
#       outputs. The loss value that will be minimized by the model will then
#       be the *weighted sum* of all individual losses, weighted by the
#       `loss_weights` coefficients.
#         If a list, it is expected to have a 1:1 mapping to the model's
#           outputs. If a dict, it is expected to map output names (strings)
#           to scalar coefficients.
#     weighted_metrics: List of metrics to be evaluated and weighted by
#       `sample_weight` or `class_weight` during training and testing.
#     run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s
#       logic will not be wrapped in a `tf.function`. Recommended to leave
#       this as `None` unless your `Model` cannot be run inside a
#       `tf.function`. `run_eagerly=True` is not supported when using
#       `tf.distribute.experimental.ParameterServerStrategy`.
#     steps_per_execution: Int. Defaults to 1. The number of batches to run
#       during each `tf.function` call. Running multiple batches inside a
#       single `tf.function` call can greatly improve performance on TPUs or
#       small models with a large Python overhead. At most, one full epoch
#       will be run each execution. If a number larger than the size of the
#       epoch is passed, the execution will be truncated to the size of the
#       epoch. Note that if `steps_per_execution` is set to `N`,
#       `Callback.on_batch_begin` and `Callback.on_batch_end` methods will
#       only be called every `N` batches (i.e. before/after each `tf.function`
#       execution).
#     jit_compile: If `True`, compile the model training step with XLA.
#       [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for
#       machine learning.
#       `jit_compile` is not enabled for by default.
#       This option cannot be enabled with `run_eagerly=True`.
#       Note that `jit_compile=True` is
#       may not necessarily work for all models.
#       For more information on supported operations please refer to the
#       [XLA documentation](https://www.tensorflow.org/xla).
#       Also refer to
#       [known XLA issues](https://www.tensorflow.org/xla/known_issues) for
#       more details.
#     **kwargs: Arguments supported for backwards compatibility only.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.fit</u></summary>
# <blockquote>
# <code>
# Trains the model for a fixed number of epochs (iterations on a dataset).
#
# Args:
#     x: Input data. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A dict mapping input names to the corresponding array/tensors,
#         if the model has named inputs.
#       - A `tf.data` dataset. Should return a tuple
#         of either `(inputs, targets)` or
#         `(inputs, targets, sample_weights)`.
#       - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
#         or `(inputs, targets, sample_weights)`.
#       - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
#         callable that takes a single argument of type
#         `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
#         `DatasetCreator` should be used when users prefer to specify the
#         per-replica batching and sharding logic for the `Dataset`.
#         See `tf.keras.utils.experimental.DatasetCreator` doc for more
#         information.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given below. If using
#       `tf.distribute.experimental.ParameterServerStrategy`, only
#       `DatasetCreator` type is supported for `x`.
#     y: Target data. Like the input data `x`,
#       it could be either Numpy array(s) or TensorFlow tensor(s).
#       It should be consistent with `x` (you cannot have Numpy inputs and
#       tensor targets, or inversely). If `x` is a dataset, generator,
#       or `keras.utils.Sequence` instance, `y` should
#       not be specified (since targets will be obtained from `x`).
#     batch_size: Integer or `None`.
#         Number of samples per gradient update.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     epochs: Integer. Number of epochs to train the model.
#         An epoch is an iteration over the entire `x` and `y`
#         data provided
#         (unless the `steps_per_epoch` flag is set to
#         something other than None).
#         Note that in conjunction with `initial_epoch`,
#         `epochs` is to be understood as "final epoch".
#         The model is not trained for a number of iterations
#         given by `epochs`, but merely until the epoch
#         of index `epochs` is reached.
#     verbose: 'auto', 0, 1, or 2. Verbosity mode.
#         0 = silent, 1 = progress bar, 2 = one line per epoch.
#         'auto' defaults to 1 for most cases, but 2 when used with
#         `ParameterServerStrategy`. Note that the progress bar is not
#         particularly useful when logged to a file, so verbose=2 is
#         recommended when not running interactively (eg, in a production
#         environment).
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during training.
#         See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger`
#         and `tf.keras.callbacks.History` callbacks are created automatically
#         and need not be passed into `model.fit`.
#         `tf.keras.callbacks.ProgbarLogger` is created or not based on
#         `verbose` argument to `model.fit`.
#         Callbacks with batch-level calls are currently unsupported with
#         `tf.distribute.experimental.ParameterServerStrategy`, and users are
#         advised to implement epoch-level calls instead with an appropriate
#         `steps_per_epoch` value.
#     validation_split: Float between 0 and 1.
#         Fraction of the training data to be used as validation data.
#         The model will set apart this fraction of the training data,
#         will not train on it, and will evaluate
#         the loss and any model metrics
#         on this data at the end of each epoch.
#         The validation data is selected from the last samples
#         in the `x` and `y` data provided, before shuffling. This argument is
#         not supported when `x` is a dataset, generator or
#        `keras.utils.Sequence` instance.
#         `validation_split` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     validation_data: Data on which to evaluate
#         the loss and any model metrics at the end of each epoch.
#         The model will not be trained on this data. Thus, note the fact
#         that the validation loss of data provided using `validation_split`
#         or `validation_data` is not affected by regularization layers like
#         noise and dropout.
#         `validation_data` will override `validation_split`.
#         `validation_data` could be:
#           - A tuple `(x_val, y_val)` of Numpy arrays or tensors.
#           - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays.
#           - A `tf.data.Dataset`.
#           - A Python generator or `keras.utils.Sequence` returning
#           `(inputs, targets)` or `(inputs, targets, sample_weights)`.
#         `validation_data` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     shuffle: Boolean (whether to shuffle the training data
#         before each epoch) or str (for 'batch'). This argument is ignored
#         when `x` is a generator or an object of tf.data.Dataset.
#         'batch' is a special option for dealing
#         with the limitations of HDF5 data; it shuffles in batch-sized
#         chunks. Has no effect when `steps_per_epoch` is not `None`.
#     class_weight: Optional dictionary mapping class indices (integers)
#         to a weight (float) value, used for weighting the loss function
#         (during training only).
#         This can be useful to tell the model to
#         "pay more attention" to samples from
#         an under-represented class.
#     sample_weight: Optional Numpy array of weights for
#         the training samples, used for weighting the loss function
#         (during training only). You can either pass a flat (1D)
#         Numpy array with the same length as the input samples
#         (1:1 mapping between weights and samples),
#         or in the case of temporal data,
#         you can pass a 2D array with shape
#         `(samples, sequence_length)`,
#         to apply a different weight to every timestep of every sample. This
#         argument is not supported when `x` is a dataset, generator, or
#        `keras.utils.Sequence` instance, instead provide the sample_weights
#         as the third element of `x`.
#     initial_epoch: Integer.
#         Epoch at which to start training
#         (useful for resuming a previous training run).
#     steps_per_epoch: Integer or `None`.
#         Total number of steps (batches of samples)
#         before declaring one epoch finished and starting the
#         next epoch. When training with input tensors such as
#         TensorFlow data tensors, the default `None` is equal to
#         the number of samples in your dataset divided by
#         the batch size, or 1 if that cannot be determined. If x is a
#         `tf.data` dataset, and 'steps_per_epoch'
#         is None, the epoch will run until the input dataset is exhausted.
#         When passing an infinitely repeating dataset, you must specify the
#         `steps_per_epoch` argument. If `steps_per_epoch=-1` the training
#         will run indefinitely with an infinitely repeating dataset.
#         This argument is not supported with array inputs.
#         When using `tf.distribute.experimental.ParameterServerStrategy`:
#           * `steps_per_epoch=None` is not supported.
#     validation_steps: Only relevant if `validation_data` is provided and
#         is a `tf.data` dataset. Total number of steps (batches of
#         samples) to draw before stopping when performing validation
#         at the end of every epoch. If 'validation_steps' is None, validation
#         will run until the `validation_data` dataset is exhausted. In the
#         case of an infinitely repeated dataset, it will run into an
#         infinite loop. If 'validation_steps' is specified and only part of
#         the dataset will be consumed, the evaluation will start from the
#         beginning of the dataset at each epoch. This ensures that the same
#         validation samples are used every time.
#     validation_batch_size: Integer or `None`.
#         Number of samples per validation batch.
#         If unspecified, will default to `batch_size`.
#         Do not specify the `validation_batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     validation_freq: Only relevant if validation data is provided. Integer
#         or `collections.abc.Container` instance (e.g. list, tuple, etc.).
#         If an integer, specifies how many training epochs to run before a
#         new validation run is performed, e.g. `validation_freq=2` runs
#         validation every 2 epochs. If a Container, specifies the epochs on
#         which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
#         validation at the end of the 1st, 2nd, and 10th epochs.
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up
#         when using process-based threading. If unspecified, `workers`
#         will default to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# Unpacking behavior for iterator-like inputs:
#     A common pattern is to pass a tf.data.Dataset, generator, or
#   tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
#   yield not only features (x) but optionally targets (y) and sample weights.
#   Keras requires that the output of such iterator-likes be unambiguous. The
#   iterator should return a tuple of length 1, 2, or 3, where the optional
#   second and third elements will be used for y and sample_weight
#   respectively. Any other type provided will be wrapped in a length one
#   tuple, effectively treating everything as 'x'. When yielding dicts, they
#   should still adhere to the top-level tuple structure.
#   e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
#   features, targets, and weights from the keys of a single dict.
#     A notable unsupported data type is the namedtuple. The reason is that
#   it behaves like both an ordered datatype (tuple) and a mapping
#   datatype (dict). So given a namedtuple of the form:
#       `namedtuple("example_tuple", ["y", "x"])`
#   it is ambiguous whether to reverse the order of the elements when
#   interpreting the value. Even worse is a tuple of the form:
#       `namedtuple("other_tuple", ["x", "y", "z"])`
#   where it is unclear if the tuple was intended to be unpacked into x, y,
#   and sample_weight or passed through as a single element to `x`. As a
#   result the data processing code will simply raise a ValueError if it
#   encounters a namedtuple. (Along with instructions to remedy the issue.)
#
# Returns:
#     A `History` object. Its `History.history` attribute is
#     a record of training loss values and metrics values
#     at successive epochs, as well as validation loss values
#     and validation metrics values (if applicable).
#
# Raises:
#     RuntimeError: 1. If the model was never compiled or,
#     2. If `model.fit` is  wrapped in `tf.function`.
#
#     ValueError: In case of mismatch between the provided input data
#         and what the model expects or when the input data is empty.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.predict</u></summary>
# <blockquote>
# <code>
# Generates output predictions for the input samples.
#
# Computation is done in batches. This method is designed for batch processing
# of large numbers of inputs. It is not intended for use inside of loops
# that iterate over your data and process small numbers of inputs at a time.
#
# For small numbers of inputs that fit in one batch,
# directly use `__call__()` for faster execution, e.g.,
# `model(x)`, or `model(x, training=False)` if you have layers such as
# `tf.keras.layers.BatchNormalization` that behave differently during
# inference. You may pair the individual model call with a `tf.function`
# for additional performance inside your inner loop.
# If you need access to numpy array values instead of tensors after your
# model call, you can use `tensor.numpy()` to get the numpy array value of
# an eager tensor.
#
# Also, note the fact that test loss is not affected by
# regularization layers like noise and dropout.
#
# Note: See [this FAQ entry](
# https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call)
# for more details about the difference between `Model` methods `predict()`
# and `__call__()`.
#
# Args:
#     x: Input samples. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A `tf.data` dataset.
#       - A generator or `keras.utils.Sequence` instance.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given in the `Unpacking behavior
#       for iterator-like inputs` section of `Model.fit`.
#     batch_size: Integer or `None`.
#         Number of samples per batch.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of dataset, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     verbose: Verbosity mode, 0 or 1.
#     steps: Total number of steps (batches of samples)
#         before declaring the prediction round finished.
#         Ignored with the default value of `None`. If x is a `tf.data`
#         dataset and `steps` is None, `predict()` will
#         run until the input dataset is exhausted.
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during prediction.
#         See [callbacks](/api_docs/python/tf/keras/callbacks).
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up when using
#         process-based threading. If unspecified, `workers` will default
#         to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# See the discussion of `Unpacking behavior for iterator-like inputs` for
# `Model.fit`. Note that Model.predict uses the same interpretation rules as
# `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
# three methods.
#
# Returns:
#     Numpy array(s) of predictions.
#
# Raises:
#     RuntimeError: If `model.predict` is wrapped in a `tf.function`.
#     ValueError: In case of mismatch between the provided
#         input data and the model's expectations,
#         or in case a stateful model receives a number of samples
#         that is not a multiple of the batch size.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.layers.core.dense.Dense</u></summary>
# <blockquote>
# <code>
# Just your regular densely-connected NN layer.
#
# `Dense` implements the operation:
# `output = activation(dot(input, kernel) + bias)`
# where `activation` is the element-wise activation function
# passed as the `activation` argument, `kernel` is a weights matrix
# created by the layer, and `bias` is a bias vector created by the layer
# (only applicable if `use_bias` is `True`). These are all attributes of
# `Dense`.
#
# Note: If the input to the layer has a rank greater than 2, then `Dense`
# computes the dot product between the `inputs` and the `kernel` along the
# last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
# For example, if input has dimensions `(batch_size, d0, d1)`,
# then we create a `kernel` with shape `(d1, units)`, and the `kernel` operates
# along axis 2 of the `input`, on every sub-tensor of shape `(1, 1, d1)`
# (there are `batch_size * d0` such sub-tensors).
# The output in this case will have shape `(batch_size, d0, units)`.
#
# Besides, layer attributes cannot be modified after the layer has been called
# once (except the `trainable` attribute).
# When a popular kwarg `input_shape` is passed, then keras will create
# an input layer to insert before the current layer. This can be treated
# equivalent to explicitly defining an `InputLayer`.
#
# Example:
#
# >>> # Create a `Sequential` model and add a Dense layer as the first layer.
# >>> model = tf.keras.models.Sequential()
# >>> model.add(tf.keras.Input(shape=(16,)))
# >>> model.add(tf.keras.layers.Dense(32, activation='relu'))
# >>> # Now the model will take as input arrays of shape (None, 16)
# >>> # and output arrays of shape (None, 32).
# >>> # Note that after the first layer, you don't need to specify
# >>> # the size of the input anymore:
# >>> model.add(tf.keras.layers.Dense(32))
# >>> model.output_shape
# (None, 32)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     If you don't specify anything, no activation is applied
#     (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean, whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix.
#   bias_initializer: Initializer for the bias vector.
#   kernel_regularizer: Regularizer function applied to
#     the `kernel` weights matrix.
#   bias_regularizer: Regularizer function applied to the bias vector.
#   activity_regularizer: Regularizer function applied to
#     the output of the layer (its "activation").
#   kernel_constraint: Constraint function applied to
#     the `kernel` weights matrix.
#   bias_constraint: Constraint function applied to the bias vector.
#
# Input shape:
#   N-D tensor with shape: `(batch_size, ..., input_dim)`.
#   The most common situation would be
#   a 2D input with shape `(batch_size, input_dim)`.
#
# Output shape:
#   N-D tensor with shape: `(batch_size, ..., units)`.
#   For instance, for a 2D input with shape `(batch_size, input_dim)`,
#   the output would have shape `(batch_size, units)`.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>numpy</b>
# <ul>
# <li>
# <details><summary><u>numpy</u></summary>
# <blockquote>
# <code>
# NumPy
# =====
#
# Provides
#   1. An array object of arbitrary homogeneous items
#   2. Fast mathematical operations over arrays
#   3. Linear Algebra, Fourier Transforms, Random Number Generation
#
# How to use the documentation
# ----------------------------
# Documentation is available in two forms: docstrings provided
# with the code, and a loose standing reference guide, available from
# `the NumPy homepage <https://www.scipy.org>`_.
#
# We recommend exploring the docstrings using
# `IPython <https://ipython.org>`_, an advanced Python shell with
# TAB-completion and introspection capabilities.  See below for further
# instructions.
#
# The docstring examples assume that `numpy` has been imported as `np`::
#
#   >>> import numpy as np
#
# Code snippets are indicated by three greater-than signs::
#
#   >>> x = 42
#   >>> x = x + 1
#
# Use the built-in ``help`` function to view a function's docstring::
#
#   >>> help(np.sort)
#   ... # doctest: +SKIP
#
# For some objects, ``np.info(obj)`` may provide additional help.  This is
# particularly true if you see the line "Help on ufunc object:" at the top
# of the help() page.  Ufuncs are implemented in C, not Python, for speed.
# The native Python help() does not know how to view their help, but our
# np.info() function does.
#
# To search for documents containing a keyword, do::
#
#   >>> np.lookfor('keyword')
#   ... # doctest: +SKIP
#
# General-purpose documents like a glossary and help on the basic concepts
# of numpy are available under the ``doc`` sub-module::
#
#   >>> from numpy import doc
#   >>> help(doc)
#   ... # doctest: +SKIP
#
# Available subpackages
# ---------------------
# doc
#     Topical documentation on broadcasting, indexing, etc.
# lib
#     Basic functions used by several sub-packages.
# random
#     Core Random Tools
# linalg
#     Core Linear Algebra Tools
# fft
#     Core FFT routines
# polynomial
#     Polynomial tools
# testing
#     NumPy testing tools
# f2py
#     Fortran to Python Interface Generator.
# distutils
#     Enhancements to distutils with support for
#     Fortran compilers support and more.
#
# Utilities
# ---------
# test
#     Run numpy unittests
# show_config
#     Show numpy build configuration
# dual
#     Overwrite certain functions with high-performance SciPy tools.
#     Note: `numpy.dual` is deprecated.  Use the functions from NumPy or Scipy
#     directly instead of importing them from `numpy.dual`.
# matlib
#     Make everything matrices.
# __version__
#     NumPy version string
#
# Viewing documentation using IPython
# -----------------------------------
# Start IPython with the NumPy profile (``ipython -p numpy``), which will
# import `numpy` under the alias `np`.  Then, use the ``cpaste`` command to
# paste examples into the shell.  To see which functions are available in
# `numpy`, type ``np.<TAB>`` (where ``<TAB>`` refers to the TAB key), or use
# ``np.*cos*?<ENTER>`` (where ``<ENTER>`` refers to the ENTER key) to narrow
# down the list.  To view the docstring for a function, use
# ``np.cos?<ENTER>`` (to view the docstring) and ``np.cos??<ENTER>`` (to view
# the source code).
#
# Copies vs. in-place operation
# -----------------------------
# Most of the functions in `numpy` return a copy of the array argument
# (e.g., `np.sort`).  In-place versions of these functions are often
# available as array methods, i.e. ``x = np.array([1,2,3]); x.sort()``.
# Exceptions to this rule are documented.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.array</u></summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.core._multiarray_umath.where</u></summary>
# <blockquote>
# <code>
# where(condition, [x, y], /)
#
# Return elements chosen from `x` or `y` depending on `condition`.
#
# .. note::
#     When only `condition` is provided, this function is a shorthand for
#     ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
#     preferred, as it behaves correctly for subclasses. The rest of this
#     documentation covers only the case where all three arguments are
#     provided.
#
# Parameters
# ----------
# condition : array_like, bool
#     Where True, yield `x`, otherwise yield `y`.
# x, y : array_like
#     Values from which to choose. `x`, `y` and `condition` need to be
#     broadcastable to some shape.
#
# Returns
# -------
# out : ndarray
#     An array with elements from `x` where `condition` is True, and elements
#     from `y` elsewhere.
#
# See Also
# --------
# choose
# nonzero : The function that is called when x and y are omitted
#
# Notes
# -----
# If all the arrays are 1-D, `where` is equivalent to::
#
#     [xv if c else yv
#      for c, xv, yv in zip(condition, x, y)]
#
# Examples
# --------
# >>> a = np.arange(10)
# >>> a
# array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# >>> np.where(a < 5, a, 10*a)
# array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
#
# This can be used on multidimensional arrays too:
#
# >>> np.where([[True, False], [True, True]],
# ...          [[1, 2], [3, 4]],
# ...          [[9, 8], [7, 6]])
# array([[1, 8],
#        [3, 4]])
#
# The shapes of x, y, and the condition are broadcast together:
#
# >>> x, y = np.ogrid[:3, :4]
# >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
# array([[10,  0,  0,  0],
#        [10, 11,  1,  1],
#        [10, 11, 12,  2]])
#
# >>> a = np.array([[0, 1, 2],
# ...               [0, 2, 4],
# ...               [0, 3, 6]])
# >>> np.where(a < 4, a, -1)  # -1 is broadcast
# array([[ 0,  1,  2],
#        [ 0,  2, -1],
#        [ 0,  3, -1]])
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>pandas</b>
# <ul>
# <li>
# <details><summary><u>pandas</u></summary>
# <blockquote>
# <code>
# pandas - a powerful data analysis and manipulation library for Python
# =====================================================================
#
# **pandas** is a Python package providing fast, flexible, and expressive data
# structures designed to make working with "relational" or "labeled" data both
# easy and intuitive. It aims to be the fundamental high-level building block for
# doing practical, **real world** data analysis in Python. Additionally, it has
# the broader goal of becoming **the most powerful and flexible open source data
# analysis / manipulation tool available in any language**. It is already well on
# its way toward this goal.
#
# Main Features
# -------------
# Here are just a few of the things that pandas does well:
#
#   - Easy handling of missing data in floating point as well as non-floating
#     point data.
#   - Size mutability: columns can be inserted and deleted from DataFrame and
#     higher dimensional objects
#   - Automatic and explicit data alignment: objects can be explicitly aligned
#     to a set of labels, or the user can simply ignore the labels and let
#     `Series`, `DataFrame`, etc. automatically align the data for you in
#     computations.
#   - Powerful, flexible group by functionality to perform split-apply-combine
#     operations on data sets, for both aggregating and transforming data.
#   - Make it easy to convert ragged, differently-indexed data in other Python
#     and NumPy data structures into DataFrame objects.
#   - Intelligent label-based slicing, fancy indexing, and subsetting of large
#     data sets.
#   - Intuitive merging and joining data sets.
#   - Flexible reshaping and pivoting of data sets.
#   - Hierarchical labeling of axes (possible to have multiple labels per tick).
#   - Robust IO tools for loading data from flat files (CSV and delimited),
#     Excel files, databases, and saving/loading data from the ultrafast HDF5
#     format.
#   - Time series-specific functionality: date range generation and frequency
#     conversion, moving window statistics, date shifting and lagging.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame</u></summary>
# <blockquote>
# <code>
# Two-dimensional, size-mutable, potentially heterogeneous tabular data.
#
# Data structure also contains labeled axes (rows and columns).
# Arithmetic operations align on both row and column labels. Can be
# thought of as a dict-like container for Series objects. The primary
# pandas data structure.
#
# Parameters
# ----------
# data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
#     Dict can contain Series, arrays, constants, dataclass or list-like objects. If
#     data is a dict, column order follows insertion-order. If a dict contains Series
#     which have an index defined, it is aligned by its index.
#
#     .. versionchanged:: 0.25.0
#        If data is a list of dicts, column order follows insertion-order.
#
# index : Index or array-like
#     Index to use for resulting frame. Will default to RangeIndex if
#     no indexing information part of input data and no index provided.
# columns : Index or array-like
#     Column labels to use for resulting frame when data does not have them,
#     defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels,
#     will perform column selection instead.
# dtype : dtype, default None
#     Data type to force. Only a single dtype is allowed. If None, infer.
# copy : bool or None, default None
#     Copy data from inputs.
#     For dict data, the default of None behaves like ``copy=True``.  For DataFrame
#     or 2d ndarray input, the default of None behaves like ``copy=False``.
#
#     .. versionchanged:: 1.3.0
#
# See Also
# --------
# DataFrame.from_records : Constructor from tuples, also record arrays.
# DataFrame.from_dict : From dicts of Series, arrays, or dicts.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_table : Read general delimited file into DataFrame.
# read_clipboard : Read text from clipboard into DataFrame.
#
# Examples
# --------
# Constructing DataFrame from a dictionary.
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df
#    col1  col2
# 0     1     3
# 1     2     4
#
# Notice that the inferred dtype is int64.
#
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# To enforce a single dtype:
#
# >>> df = pd.DataFrame(data=d, dtype=np.int8)
# >>> df.dtypes
# col1    int8
# col2    int8
# dtype: object
#
# Constructing DataFrame from a dictionary including Series:
#
# >>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])}
# >>> pd.DataFrame(data=d, index=[0, 1, 2, 3])
#    col1  col2
# 0     0   NaN
# 1     1   NaN
# 2     2   2.0
# 3     3   3.0
#
# Constructing DataFrame from numpy ndarray:
#
# >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
# ...                    columns=['a', 'b', 'c'])
# >>> df2
#    a  b  c
# 0  1  2  3
# 1  4  5  6
# 2  7  8  9
#
# Constructing DataFrame from a numpy ndarray that has labeled columns:
#
# >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
# ...                 dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")])
# >>> df3 = pd.DataFrame(data, columns=['c', 'a'])
# ...
# >>> df3
#    c  a
# 0  3  1
# 1  6  4
# 2  9  7
#
# Constructing DataFrame from dataclass:
#
# >>> from dataclasses import make_dataclass
# >>> Point = make_dataclass("Point", [("x", int), ("y", int)])
# >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
#    x  y
# 0  0  0
# 1  0  3
# 2  2  3
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.replace</u></summary>
# <blockquote>
# <code>
# Replace values given in `to_replace` with `value`.
#
# Values of the DataFrame are replaced with other values dynamically.
#
# This differs from updating with ``.loc`` or ``.iloc``, which require
# you to specify a location to update with some value.
#
# Parameters
# ----------
# to_replace : str, regex, list, dict, Series, int, float, or None
#     How to find the values that will be replaced.
#
#     * numeric, str or regex:
#
#         - numeric: numeric values equal to `to_replace` will be
#           replaced with `value`
#         - str: string exactly matching `to_replace` will be replaced
#           with `value`
#         - regex: regexs matching `to_replace` will be replaced with
#           `value`
#
#     * list of str, regex, or numeric:
#
#         - First, if `to_replace` and `value` are both lists, they
#           **must** be the same length.
#         - Second, if ``regex=True`` then all of the strings in **both**
#           lists will be interpreted as regexs otherwise they will match
#           directly. This doesn't matter much for `value` since there
#           are only a few possible substitution regexes you can use.
#         - str, regex and numeric rules apply as above.
#
#     * dict:
#
#         - Dicts can be used to specify different replacement values
#           for different existing values. For example,
#           ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and
#           'y' with 'z'. To use a dict in this way the `value`
#           parameter should be `None`.
#         - For a DataFrame a dict can specify that different values
#           should be replaced in different columns. For example,
#           ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a'
#           and the value 'z' in column 'b' and replaces these values
#           with whatever is specified in `value`. The `value` parameter
#           should not be ``None`` in this case. You can treat this as a
#           special case of passing two lists except that you are
#           specifying the column to search in.
#         - For a DataFrame nested dictionaries, e.g.,
#           ``{'a': {'b': np.nan}}``, are read as follows: look in column
#           'a' for the value 'b' and replace it with NaN. The `value`
#           parameter should be ``None`` to use a nested dict in this
#           way. You can nest regular expressions as well. Note that
#           column names (the top-level dictionary keys in a nested
#           dictionary) **cannot** be regular expressions.
#
#     * None:
#
#         - This means that the `regex` argument must be a string,
#           compiled regular expression, or list, dict, ndarray or
#           Series of such elements. If `value` is also ``None`` then
#           this **must** be a nested dictionary or Series.
#
#     See the examples section for examples of each of these.
# value : scalar, dict, list, str, regex, default None
#     Value to replace any values matching `to_replace` with.
#     For a DataFrame a dict of values can be used to specify which
#     value to use for each column (columns not in the dict will not be
#     filled). Regular expressions, strings and lists or dicts of such
#     objects are also allowed.
#
# inplace : bool, default False
#     If True, performs operation inplace and returns None.
# limit : int, default None
#     Maximum size gap to forward or backward fill.
# regex : bool or same types as `to_replace`, default False
#     Whether to interpret `to_replace` and/or `value` as regular
#     expressions. If this is ``True`` then `to_replace` *must* be a
#     string. Alternatively, this could be a regular expression or a
#     list, dict, or array of regular expressions in which case
#     `to_replace` must be ``None``.
# method : {'pad', 'ffill', 'bfill', `None`}
#     The method to use when for replacement, when `to_replace` is a
#     scalar, list or tuple and `value` is ``None``.
#
#     .. versionchanged:: 0.23.0
#         Added to DataFrame.
#
# Returns
# -------
# DataFrame
#     Object after replacement.
#
# Raises
# ------
# AssertionError
#     * If `regex` is not a ``bool`` and `to_replace` is not
#       ``None``.
#
# TypeError
#     * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
#     * If `to_replace` is a ``dict`` and `value` is not a ``list``,
#       ``dict``, ``ndarray``, or ``Series``
#     * If `to_replace` is ``None`` and `regex` is not compilable
#       into a regular expression or is a list, dict, ndarray, or
#       Series.
#     * When replacing multiple ``bool`` or ``datetime64`` objects and
#       the arguments to `to_replace` does not match the type of the
#       value being replaced
#
# ValueError
#     * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
#       `value` but they are not the same length.
#
# See Also
# --------
# DataFrame.fillna : Fill NA values.
# DataFrame.where : Replace values based on boolean condition.
# Series.str.replace : Simple string replacement.
#
# Notes
# -----
# * Regex substitution is performed under the hood with ``re.sub``. The
#   rules for substitution for ``re.sub`` are the same.
# * Regular expressions will only substitute on strings, meaning you
#   cannot provide, for example, a regular expression matching floating
#   point numbers and expect the columns in your frame that have a
#   numeric dtype to be matched. However, if those floating point
#   numbers *are* strings, then you can do this.
# * This method has *a lot* of options. You are encouraged to experiment
#   and play with this method to gain intuition about how it works.
# * When dict is used as the `to_replace` value, it is like
#   key(s) in the dict are the to_replace part and
#   value(s) in the dict are the value parameter.
#
# Examples
# --------
#
# **Scalar `to_replace` and `value`**
#
# >>> s = pd.Series([1, 2, 3, 4, 5])
# >>> s.replace(1, 5)
# 0    5
# 1    2
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
# ...                    'B': [5, 6, 7, 8, 9],
# ...                    'C': ['a', 'b', 'c', 'd', 'e']})
# >>> df.replace(0, 5)
#     A  B  C
# 0  5  5  a
# 1  1  6  b
# 2  2  7  c
# 3  3  8  d
# 4  4  9  e
#
# **List-like `to_replace`**
#
# >>> df.replace([0, 1, 2, 3], 4)
#     A  B  C
# 0  4  5  a
# 1  4  6  b
# 2  4  7  c
# 3  4  8  d
# 4  4  9  e
#
# >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
#     A  B  C
# 0  4  5  a
# 1  3  6  b
# 2  2  7  c
# 3  1  8  d
# 4  4  9  e
#
# >>> s.replace([1, 2], method='bfill')
# 0    3
# 1    3
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# **dict-like `to_replace`**
#
# >>> df.replace({0: 10, 1: 100})
#         A  B  C
# 0   10  5  a
# 1  100  6  b
# 2    2  7  c
# 3    3  8  d
# 4    4  9  e
#
# >>> df.replace({'A': 0, 'B': 5}, 100)
#         A    B  C
# 0  100  100  a
# 1    1    6  b
# 2    2    7  c
# 3    3    8  d
# 4    4    9  e
#
# >>> df.replace({'A': {0: 100, 4: 400}})
#         A  B  C
# 0  100  5  a
# 1    1  6  b
# 2    2  7  c
# 3    3  8  d
# 4  400  9  e
#
# **Regular expression `to_replace`**
#
# >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
# ...                    'B': ['abc', 'bar', 'xyz']})
# >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
#         A    B
# 0   new  abc
# 1   foo  bar
# 2  bait  xyz
#
# >>> df.replace(regex=r'^ba.$', value='new')
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
#         A    B
# 0   new  abc
# 1   xyz  new
# 2  bait  xyz
#
# >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
#         A    B
# 0   new  abc
# 1   new  new
# 2  bait  xyz
#
# Compare the behavior of ``s.replace({'a': None})`` and
# ``s.replace('a', None)`` to understand the peculiarities
# of the `to_replace` parameter:
#
# >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
#
# When one uses a dict as the `to_replace` value, it is like the
# value(s) in the dict are equal to the `value` parameter.
# ``s.replace({'a': None})`` is equivalent to
# ``s.replace(to_replace={'a': None}, value=None, method=None)``:
#
# >>> s.replace({'a': None})
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
# When ``value`` is not explicitly passed and `to_replace` is a scalar, list
# or tuple, `replace` uses the method parameter (default 'pad') to do the
# replacement. So this is why the 'a' values are being replaced by 10
# in rows 1 and 2 and 'b' in row 4 in this case.
#
# >>> s.replace('a')
# 0    10
# 1    10
# 2    10
# 3     b
# 4     b
# dtype: object
#
# On the other hand, if ``None`` is explicitly passed for ``value``, it will
# be respected:
#
# >>> s.replace('a', None)
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
#     .. versionchanged:: 1.4.0
#         Previously the explicit ``None`` was silently ignored.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u></summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.to_csv</u></summary>
# <blockquote>
# <code>
# Write object to a comma-separated values (csv) file.
#
# Parameters
# ----------
# path_or_buf : str, path object, file-like object, or None, default None
#     String, path object (implementing os.PathLike[str]), or file-like
#     object implementing a write() function. If None, the result is
#     returned as a string. If a non-binary file object is passed, it should
#     be opened with `newline=''`, disabling universal newlines. If a binary
#     file object is passed, `mode` might need to contain a `'b'`.
#
#     .. versionchanged:: 1.2.0
#
#        Support for binary file objects was introduced.
#
# sep : str, default ','
#     String of length 1. Field delimiter for the output file.
# na_rep : str, default ''
#     Missing data representation.
# float_format : str, default None
#     Format string for floating point numbers.
# columns : sequence, optional
#     Columns to write.
# header : bool or list of str, default True
#     Write out the column names. If a list of strings is given it is
#     assumed to be aliases for the column names.
# index : bool, default True
#     Write row names (index).
# index_label : str or sequence, or False, default None
#     Column label for index column(s) if desired. If None is given, and
#     `header` and `index` are True, then the index names are used. A
#     sequence should be given if the object uses MultiIndex. If
#     False do not print fields for index names. Use index_label=False
#     for easier importing in R.
# mode : str
#     Python write mode, default 'w'.
# encoding : str, optional
#     A string representing the encoding to use in the output file,
#     defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
#     is a non-binary file object.
# compression : str or dict, default 'infer'
#     For on-the-fly compression of the output data. If 'infer' and '%s'
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). Set to
#     ``None`` for no compression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for faster compression and to create
#     a reproducible gzip archive:
#     ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
#
#     .. versionchanged:: 1.0.0
#
#        May now be a dict with key 'method' as compression mode
#        and other entries as additional compression options if
#        compression mode is 'zip'.
#
#     .. versionchanged:: 1.1.0
#
#        Passing compression options as keys in dict is
#        supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'.
#
#     .. versionchanged:: 1.2.0
#
#         Compression is supported for binary file objects.
#
#     .. versionchanged:: 1.2.0
#
#         Previous versions forwarded dict entries for 'gzip' to
#         `gzip.open` instead of `gzip.GzipFile` which prevented
#         setting `mtime`.
#
# quoting : optional constant from csv module
#     Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
#     then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
#     will treat them as non-numeric.
# quotechar : str, default '\"'
#     String of length 1. Character used to quote fields.
# line_terminator : str, optional
#     The newline character or character sequence to use in the output
#     file. Defaults to `os.linesep`, which depends on the OS in which
#     this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).
# chunksize : int or None
#     Rows to write at a time.
# date_format : str, default None
#     Format string for datetime objects.
# doublequote : bool, default True
#     Control quoting of `quotechar` inside a field.
# escapechar : str, default None
#     String of length 1. Character used to escape `sep` and `quotechar`
#     when appropriate.
# decimal : str, default '.'
#     Character recognized as decimal separator. E.g. use ',' for
#     European data.
# errors : str, default 'strict'
#     Specifies how encoding and decoding errors are to be handled.
#     See the errors argument for :func:`open` for a full list
#     of options.
#
#     .. versionadded:: 1.1.0
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2.0
#
# Returns
# -------
# None or str
#     If path_or_buf is None, returns the resulting csv format as a
#     string. Otherwise returns None.
#
# See Also
# --------
# read_csv : Load a CSV file into a DataFrame.
# to_excel : Write DataFrame to an Excel file.
#
# Examples
# --------
# >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
# ...                    'mask': ['red', 'purple'],
# ...                    'weapon': ['sai', 'bo staff']})
# >>> df.to_csv(index=False)
# 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
#
# Create 'out.zip' containing 'out.csv'
#
# >>> compression_opts = dict(method='zip',
# ...                         archive_name='out.csv')  # doctest: +SKIP
# >>> df.to_csv('out.zip', index=False,
# ...           compression=compression_opts)  # doctest: +SKIP
#
# To write a csv file to a new folder or nested folder you will first
# need to create it using either Pathlib or os:
#
# >>> from pathlib import Path  # doctest: +SKIP
# >>> filepath = Path('folder/subfolder/out.csv')  # doctest: +SKIP
# >>> filepath.parent.mkdir(parents=True, exist_ok=True)  # doctest: +SKIP
# >>> df.to_csv(filepath)  # doctest: +SKIP
#
# >>> import os  # doctest: +SKIP
# >>> os.makedirs('folder/subfolder', exist_ok=True)  # doctest: +SKIP
# >>> df.to_csv('folder/subfolder/out.csv')  # doctest: +SKIP
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.series.Series</u></summary>
# <blockquote>
# <code>
# One-dimensional ndarray with axis labels (including time series).
#
# Labels need not be unique but must be a hashable type. The object
# supports both integer- and label-based indexing and provides a host of
# methods for performing operations involving the index. Statistical
# methods from ndarray have been overridden to automatically exclude
# missing data (currently represented as NaN).
#
# Operations between Series (+, -, /, \*, \*\*) align values based on their
# associated index values-- they need not be the same length. The result
# index will be the sorted union of the two indexes.
#
# Parameters
# ----------
# data : array-like, Iterable, dict, or scalar value
#     Contains data stored in Series. If data is a dict, argument order is
#     maintained.
# index : array-like or Index (1d)
#     Values must be hashable and have the same length as `data`.
#     Non-unique index values are allowed. Will default to
#     RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like
#     and index is None, then the keys in the data are used as the index. If the
#     index is not None, the resulting Series is reindexed with the index values.
# dtype : str, numpy.dtype, or ExtensionDtype, optional
#     Data type for the output Series. If not specified, this will be
#     inferred from `data`.
#     See the :ref:`user guide <basics.dtypes>` for more usages.
# name : str, optional
#     The name to give to the Series.
# copy : bool, default False
#     Copy input data. Only affects Series or 1d ndarray input. See examples.
#
# Examples
# --------
# Constructing Series from a dictionary with an Index specified
#
# >>> d = {'a': 1, 'b': 2, 'c': 3}
# >>> ser = pd.Series(data=d, index=['a', 'b', 'c'])
# >>> ser
# a   1
# b   2
# c   3
# dtype: int64
#
# The keys of the dictionary match with the Index values, hence the Index
# values have no effect.
#
# >>> d = {'a': 1, 'b': 2, 'c': 3}
# >>> ser = pd.Series(data=d, index=['x', 'y', 'z'])
# >>> ser
# x   NaN
# y   NaN
# z   NaN
# dtype: float64
#
# Note that the Index is first build with the keys from the dictionary.
# After this the Series is reindexed with the given Index values, hence we
# get all NaN as a result.
#
# Constructing Series from a list with `copy=False`.
#
# >>> r = [1, 2]
# >>> ser = pd.Series(r, copy=False)
# >>> ser.iloc[0] = 999
# >>> r
# [1, 2]
# >>> ser
# 0    999
# 1      2
# dtype: int64
#
# Due to input data type the Series has a `copy` of
# the original data even though `copy=False`, so
# the data is unchanged.
#
# Constructing Series from a 1d ndarray with `copy=False`.
#
# >>> r = np.array([1, 2])
# >>> ser = pd.Series(r, copy=False)
# >>> ser.iloc[0] = 999
# >>> r
# array([999,   2])
# >>> ser
# 0    999
# 1      2
# dtype: int64
#
# Due to input data type the Series has a `view` on
# the original data, so
# the data is changed as well.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.tools.numeric.to_numeric</u></summary>
# <blockquote>
# <code>
# Convert argument to a numeric type.
#
# The default return dtype is `float64` or `int64`
# depending on the data supplied. Use the `downcast` parameter
# to obtain other dtypes.
#
# Please note that precision loss may occur if really large numbers
# are passed in. Due to the internal limitations of `ndarray`, if
# numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
# or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
# passed in, it is very likely they will be converted to float so that
# they can stored in an `ndarray`. These warnings apply similarly to
# `Series` since it internally leverages `ndarray`.
#
# Parameters
# ----------
# arg : scalar, list, tuple, 1-d array, or Series
#     Argument to be converted.
# errors : {'ignore', 'raise', 'coerce'}, default 'raise'
#     - If 'raise', then invalid parsing will raise an exception.
#     - If 'coerce', then invalid parsing will be set as NaN.
#     - If 'ignore', then invalid parsing will return the input.
# downcast : str, default None
#     Can be 'integer', 'signed', 'unsigned', or 'float'.
#     If not None, and if the data has been successfully cast to a
#     numerical dtype (or if the data was numeric to begin with),
#     downcast that resulting data to the smallest numerical dtype
#     possible according to the following rules:
#
#     - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
#     - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
#     - 'float': smallest float dtype (min.: np.float32)
#
#     As this behaviour is separate from the core conversion to
#     numeric values, any errors raised during the downcasting
#     will be surfaced regardless of the value of the 'errors' input.
#
#     In addition, downcasting will only occur if the size
#     of the resulting data's dtype is strictly larger than
#     the dtype it is to be cast to, so if none of the dtypes
#     checked satisfy that specification, no downcasting will be
#     performed on the data.
#
# Returns
# -------
# ret
#     Numeric if parsing succeeded.
#     Return type depends on input.  Series if Series, otherwise ndarray.
#
# See Also
# --------
# DataFrame.astype : Cast argument to a specified dtype.
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
# DataFrame.convert_dtypes : Convert dtypes.
#
# Examples
# --------
# Take separate series and convert to numeric, coercing when told to
#
# >>> s = pd.Series(['1.0', '2', -3])
# >>> pd.to_numeric(s)
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float64
# >>> pd.to_numeric(s, downcast='float')
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float32
# >>> pd.to_numeric(s, downcast='signed')
# 0    1
# 1    2
# 2   -3
# dtype: int8
# >>> s = pd.Series(['apple', '1.0', '2', -3])
# >>> pd.to_numeric(s, errors='ignore')
# 0    apple
# 1      1.0
# 2        2
# 3       -3
# dtype: object
# >>> pd.to_numeric(s, errors='coerce')
# 0    NaN
# 1    1.0
# 2    2.0
# 3   -3.0
# dtype: float64
#
# Downcasting of nullable integer and floating dtypes is supported:
#
# >>> s = pd.Series([1, 2, 3], dtype="Int64")
# >>> pd.to_numeric(s, downcast="integer")
# 0    1
# 1    2
# 2    3
# dtype: Int8
# >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
# >>> pd.to_numeric(s, downcast="float")
# 0    1.0
# 1    2.1
# 2    3.0
# dtype: Float32
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.io.parsers.readers.read_csv</u></summary>
# <blockquote>
# <code>
# Read a comma-separated values (csv) file into DataFrame.
#
# Also supports optionally iterating or breaking of the file
# into chunks.
#
# Additional help can be found in the online docs for
# `IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
#
# Parameters
# ----------
# filepath_or_buffer : str, path object or file-like object
#     Any valid string path is acceptable. The string could be a URL. Valid
#     URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
#     expected. A local file could be: file://localhost/path/to/table.csv.
#
#     If you want to pass in a path object, pandas accepts any ``os.PathLike``.
#
#     By file-like object, we refer to objects with a ``read()`` method, such as
#     a file handle (e.g. via builtin ``open`` function) or ``StringIO``.
# sep : str, default ','
#     Delimiter to use. If sep is None, the C engine cannot automatically detect
#     the separator, but the Python parsing engine can, meaning the latter will
#     be used and automatically detect the separator by Python's builtin sniffer
#     tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
#     different from ``'\s+'`` will be interpreted as regular expressions and
#     will also force the use of the Python parsing engine. Note that regex
#     delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
# delimiter : str, default ``None``
#     Alias for sep.
# header : int, list of int, None, default 'infer'
#     Row number(s) to use as the column names, and the start of the
#     data.  Default behavior is to infer the column names: if no names
#     are passed the behavior is identical to ``header=0`` and column
#     names are inferred from the first line of the file, if column
#     names are passed explicitly then the behavior is identical to
#     ``header=None``. Explicitly pass ``header=0`` to be able to
#     replace existing names. The header can be a list of integers that
#     specify row locations for a multi-index on the columns
#     e.g. [0,1,3]. Intervening rows that are not specified will be
#     skipped (e.g. 2 in this example is skipped). Note that this
#     parameter ignores commented lines and empty lines if
#     ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
#     data rather than the first line of the file.
# names : array-like, optional
#     List of column names to use. If the file contains a header row,
#     then you should explicitly pass ``header=0`` to override the column names.
#     Duplicates in this list are not allowed.
# index_col : int, str, sequence of int / str, or False, optional, default ``None``
#   Column(s) to use as the row labels of the ``DataFrame``, either given as
#   string name or column index. If a sequence of int / str is given, a
#   MultiIndex is used.
#
#   Note: ``index_col=False`` can be used to force pandas to *not* use the first
#   column as the index, e.g. when you have a malformed file with delimiters at
#   the end of each line.
# usecols : list-like or callable, optional
#     Return a subset of the columns. If list-like, all elements must either
#     be positional (i.e. integer indices into the document columns) or strings
#     that correspond to column names provided either by the user in `names` or
#     inferred from the document header row(s). If ``names`` are given, the document
#     header row(s) are not taken into account. For example, a valid list-like
#     `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
#     Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
#     To instantiate a DataFrame from ``data`` with element order preserved use
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
#     in ``['foo', 'bar']`` order or
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
#     for ``['bar', 'foo']`` order.
#
#     If callable, the callable function will be evaluated against the column
#     names, returning names where the callable function evaluates to True. An
#     example of a valid callable argument would be ``lambda x: x.upper() in
#     ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
#     parsing time and lower memory usage.
# squeeze : bool, default False
#     If the parsed data only contains one column then return a Series.
#
#     .. deprecated:: 1.4.0
#         Append ``.squeeze("columns")`` to the call to ``read_csv`` to squeeze
#         the data.
# prefix : str, optional
#     Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
#
#     .. deprecated:: 1.4.0
#        Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
# mangle_dupe_cols : bool, default True
#     Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
#     'X'...'X'. Passing in False will cause data to be overwritten if there
#     are duplicate names in the columns.
# dtype : Type name or dict of column -> type, optional
#     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,
#     'c': 'Int64'}
#     Use `str` or `object` together with suitable `na_values` settings
#     to preserve and not interpret dtype.
#     If converters are specified, they will be applied INSTEAD
#     of dtype conversion.
# engine : {'c', 'python', 'pyarrow'}, optional
#     Parser engine to use. The C and pyarrow engines are faster, while the python engine
#     is currently more feature-complete. Multithreading is currently only supported by
#     the pyarrow engine.
#
#     .. versionadded:: 1.4.0
#
#         The "pyarrow" engine was added as an *experimental* engine, and some features
#         are unsupported, or may not work correctly, with this engine.
# converters : dict, optional
#     Dict of functions for converting values in certain columns. Keys can either
#     be integers or column labels.
# true_values : list, optional
#     Values to consider as True.
# false_values : list, optional
#     Values to consider as False.
# skipinitialspace : bool, default False
#     Skip spaces after delimiter.
# skiprows : list-like, int or callable, optional
#     Line numbers to skip (0-indexed) or number of lines to skip (int)
#     at the start of the file.
#
#     If callable, the callable function will be evaluated against the row
#     indices, returning True if the row should be skipped and False otherwise.
#     An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
# skipfooter : int, default 0
#     Number of lines at bottom of file to skip (Unsupported with engine='c').
# nrows : int, optional
#     Number of rows of file to read. Useful for reading pieces of large files.
# na_values : scalar, str, list-like, or dict, optional
#     Additional strings to recognize as NA/NaN. If dict passed, specific
#     per-column NA values.  By default the following values are interpreted as
#     NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',
#     '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',
#     'nan', 'null'.
# keep_default_na : bool, default True
#     Whether or not to include the default NaN values when parsing the data.
#     Depending on whether `na_values` is passed in, the behavior is as follows:
#
#     * If `keep_default_na` is True, and `na_values` are specified, `na_values`
#       is appended to the default NaN values used for parsing.
#     * If `keep_default_na` is True, and `na_values` are not specified, only
#       the default NaN values are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are specified, only
#       the NaN values specified `na_values` are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are not specified, no
#       strings will be parsed as NaN.
#
#     Note that if `na_filter` is passed in as False, the `keep_default_na` and
#     `na_values` parameters will be ignored.
# na_filter : bool, default True
#     Detect missing value markers (empty strings and the value of na_values). In
#     data without any NAs, passing na_filter=False can improve the performance
#     of reading a large file.
# verbose : bool, default False
#     Indicate number of NA values placed in non-numeric columns.
# skip_blank_lines : bool, default True
#     If True, skip over blank lines rather than interpreting as NaN values.
# parse_dates : bool or list of int or names or list of lists or dict, default False
#     The behavior is as follows:
#
#     * boolean. If True -> try parsing the index.
#     * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
#       each as a separate date column.
#     * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
#       a single date column.
#     * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call
#       result 'foo'
#
#     If a column or index cannot be represented as an array of datetimes,
#     say because of an unparsable value or a mixture of timezones, the column
#     or index will be returned unaltered as an object data type. For
#     non-standard datetime parsing, use ``pd.to_datetime`` after
#     ``pd.read_csv``. To parse an index or column with a mixture of timezones,
#     specify ``date_parser`` to be a partially-applied
#     :func:`pandas.to_datetime` with ``utc=True``. See
#     :ref:`io.csv.mixed_timezones` for more.
#
#     Note: A fast-path exists for iso8601-formatted dates.
# infer_datetime_format : bool, default False
#     If True and `parse_dates` is enabled, pandas will attempt to infer the
#     format of the datetime strings in the columns, and if it can be inferred,
#     switch to a faster method of parsing them. In some cases this can increase
#     the parsing speed by 5-10x.
# keep_date_col : bool, default False
#     If True and `parse_dates` specifies combining multiple columns then
#     keep the original columns.
# date_parser : function, optional
#     Function to use for converting a sequence of string columns to an array of
#     datetime instances. The default uses ``dateutil.parser.parser`` to do the
#     conversion. Pandas will try to call `date_parser` in three different ways,
#     advancing to the next if an exception occurs: 1) Pass one or more arrays
#     (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
#     string values from the columns defined by `parse_dates` into a single array
#     and pass that; and 3) call `date_parser` once for each row using one or
#     more strings (corresponding to the columns defined by `parse_dates`) as
#     arguments.
# dayfirst : bool, default False
#     DD/MM format dates, international and European format.
# cache_dates : bool, default True
#     If True, use a cache of unique, converted dates to apply the datetime
#     conversion. May produce significant speed-up when parsing duplicate
#     date strings, especially ones with timezone offsets.
#
#     .. versionadded:: 0.25.0
# iterator : bool, default False
#     Return TextFileReader object for iteration or getting chunks with
#     ``get_chunk()``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# chunksize : int, optional
#     Return TextFileReader object for iteration.
#     See the `IO Tools docs
#     <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
#     for more information on ``iterator`` and ``chunksize``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# compression : str or dict, default 'infer'
#     For on-the-fly decompression of on-disk data. If 'infer' and '%s' is
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). If using
#     'zip', the ZIP file must contain only one data file to be read in. Set to
#     ``None`` for no decompression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for Zstandard decompression using a
#     custom compression dictionary:
#     ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
#
#     .. versionchanged:: 1.4.0 Zstandard support.
#
# thousands : str, optional
#     Thousands separator.
# decimal : str, default '.'
#     Character to recognize as decimal point (e.g. use ',' for European data).
# lineterminator : str (length 1), optional
#     Character to break file into lines. Only valid with C parser.
# quotechar : str (length 1), optional
#     The character used to denote the start and end of a quoted item. Quoted
#     items can include the delimiter and it will be ignored.
# quoting : int or csv.QUOTE_* instance, default 0
#     Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
#     QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
# doublequote : bool, default ``True``
#    When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
#    whether or not to interpret two consecutive quotechar elements INSIDE a
#    field as a single ``quotechar`` element.
# escapechar : str (length 1), optional
#     One-character string used to escape other characters.
# comment : str, optional
#     Indicates remainder of line should not be parsed. If found at the beginning
#     of a line, the line will be ignored altogether. This parameter must be a
#     single character. Like empty lines (as long as ``skip_blank_lines=True``),
#     fully commented lines are ignored by the parameter `header` but not by
#     `skiprows`. For example, if ``comment='#'``, parsing
#     ``#empty\na,b,c\n1,2,3`` with ``header=0`` will result in 'a,b,c' being
#     treated as the header.
# encoding : str, optional
#     Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
#     standard encodings
#     <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
#
#     .. versionchanged:: 1.2
#
#        When ``encoding`` is ``None``, ``errors="replace"`` is passed to
#        ``open()``. Otherwise, ``errors="strict"`` is passed to ``open()``.
#        This behavior was previously only the case for ``engine="python"``.
#
#     .. versionchanged:: 1.3.0
#
#        ``encoding_errors`` is a new argument. ``encoding`` has no longer an
#        influence on how encoding errors are handled.
#
# encoding_errors : str, optional, default "strict"
#     How encoding errors are treated. `List of possible values
#     <https://docs.python.org/3/library/codecs.html#error-handlers>`_ .
#
#     .. versionadded:: 1.3.0
#
# dialect : str or csv.Dialect, optional
#     If provided, this parameter will override values (default or not) for the
#     following parameters: `delimiter`, `doublequote`, `escapechar`,
#     `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
#     override values, a ParserWarning will be issued. See csv.Dialect
#     documentation for more details.
# error_bad_lines : bool, optional, default ``None``
#     Lines with too many fields (e.g. a csv line with too many commas) will by
#     default cause an exception to be raised, and no DataFrame will be returned.
#     If False, then these "bad lines" will be dropped from the DataFrame that is
#     returned.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# warn_bad_lines : bool, optional, default ``None``
#     If error_bad_lines is False, and warn_bad_lines is True, a warning for each
#     "bad line" will be output.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# on_bad_lines : {'error', 'warn', 'skip'} or callable, default 'error'
#     Specifies what to do upon encountering a bad line (a line with too many fields).
#     Allowed values are :
#
#         - 'error', raise an Exception when a bad line is encountered.
#         - 'warn', raise a warning when a bad line is encountered and skip that line.
#         - 'skip', skip bad lines without raising or warning when they are encountered.
#
#     .. versionadded:: 1.3.0
#
#         - callable, function with signature
#           ``(bad_line: list[str]) -> list[str] | None`` that will process a single
#           bad line. ``bad_line`` is a list of strings split by the ``sep``.
#           If the function returns ``None``, the bad line will be ignored.
#           If the function returns a new list of strings with more elements than
#           expected, a ``ParserWarning`` will be emitted while dropping extra elements.
#           Only supported when ``engine="python"``
#
#     .. versionadded:: 1.4.0
#
# delim_whitespace : bool, default False
#     Specifies whether or not whitespace (e.g. ``' '`` or ``'    '``) will be
#     used as the sep. Equivalent to setting ``sep='\s+'``. If this option
#     is set to True, nothing should be passed in for the ``delimiter``
#     parameter.
# low_memory : bool, default True
#     Internally process the file in chunks, resulting in lower memory use
#     while parsing, but possibly mixed type inference.  To ensure no mixed
#     types either set False, or specify the type with the `dtype` parameter.
#     Note that the entire file is read into a single DataFrame regardless,
#     use the `chunksize` or `iterator` parameter to return the data in chunks.
#     (Only valid with C parser).
# memory_map : bool, default False
#     If a filepath is provided for `filepath_or_buffer`, map the file object
#     directly onto memory and access the data directly from there. Using this
#     option can improve performance because there is no longer any I/O overhead.
# float_precision : str, optional
#     Specifies which converter the C engine should use for floating-point
#     values. The options are ``None`` or 'high' for the ordinary converter,
#     'legacy' for the original lower precision pandas converter, and
#     'round_trip' for the round-trip converter.
#
#     .. versionchanged:: 1.2
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2
#
# Returns
# -------
# DataFrame or TextParser
#     A comma-separated values (csv) file is returned as two-dimensional
#     data structure with labeled axes.
#
# See Also
# --------
# DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_fwf : Read a table of fixed-width formatted lines into DataFrame.
#
# Examples
# --------
# >>> pd.read_csv('data.csv')  # doctest: +SKIP
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <b>tensorflow</b>
# <ul>
# <li>
# <details><summary><u>tensorflow</u></summary>
# <blockquote>
# <code>
# Top-level module of TensorFlow. By convention, we refer to this module as
# `tf` instead of `tensorflow`, following the common practice of importing
# TensorFlow via the command `import tensorflow as tf`.
#
# The primary function of this module is to import all of the public TensorFlow
# interfaces into a single place. The interfaces themselves are located in
# sub-modules, as described below.
#
# Note that the file `__init__.py` in the TensorFlow source code tree is actually
# only a placeholder to enable test cases to run. The TensorFlow build replaces
# this file with a file generated from [`api_template.__init__.py`](https://www.github.com/tensorflow/tensorflow/blob/master/tensorflow/api_template.__init__.py)
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>tensorflow._api.v2.nn</u></summary>
# <blockquote>
# <code>
# Primitive Neural Net (NN) Operations.
#
# # Notes on padding
#
# Several neural network operations, such as `tf.nn.conv2d` and
# `tf.nn.max_pool2d`, take a `padding` parameter, which controls how the input is
# padded before running the operation. The input is padded by inserting values
# (typically zeros) before and after the tensor in each spatial dimension. The
# `padding` parameter can either be the string `'VALID'`, which means use no
# padding, or `'SAME'` which adds padding according to a formula which is
# described below. Certain ops also allow the amount of padding per dimension to
# be explicitly specified by passing a list to `padding`.
#
# In the case of convolutions, the input is padded with zeros. In case of pools,
# the padded input values are ignored. For example, in a max pool, the sliding
# window ignores padded values, which is equivalent to the padded values being
# `-infinity`.
#
# ## `'VALID'` padding
#
# Passing `padding='VALID'` to an op causes no padding to be used. This causes the
# output size to typically be smaller than the input size, even when the stride is
# one. In the 2D case, the output size is computed as:
#
# ```
# out_height = ceil((in_height - filter_height + 1) / stride_height)
# out_width  = ceil((in_width - filter_width + 1) / stride_width)
# ```
#
# The 1D and 3D cases are similar. Note `filter_height` and `filter_width` refer
# to the filter size after dilations (if any) for convolutions, and refer to the
# window size for pools.
#
# ## `'SAME'` padding
#
# With `'SAME'` padding, padding is applied to each spatial dimension. When the
# strides are 1, the input is padded such that the output size is the same as the
# input size. In the 2D case, the output size is computed as:
#
# ```
# out_height = ceil(in_height / stride_height)
# out_width  = ceil(in_width / stride_width)
# ```
#
# The amount of padding used is the smallest amount that results in the output
# size. The formula for the total amount of padding per dimension is:
#
# ```
# if (in_height % strides[1] == 0):
#   pad_along_height = max(filter_height - stride_height, 0)
# else:
#   pad_along_height = max(filter_height - (in_height % stride_height), 0)
# if (in_width % strides[2] == 0):
#   pad_along_width = max(filter_width - stride_width, 0)
# else:
#   pad_along_width = max(filter_width - (in_width % stride_width), 0)
# ```
#
# Finally, the padding on the top, bottom, left and right are:
#
# ```
# pad_top = pad_along_height // 2
# pad_bottom = pad_along_height - pad_top
# pad_left = pad_along_width // 2
# pad_right = pad_along_width - pad_left
# ```
#
# Note that the division by 2 means that there might be cases when the padding on
# both sides (top vs bottom, right vs left) are off by one. In this case, the
# bottom and right sides always get the one additional padded pixel. For example,
# when pad_along_height is 5, we pad 2 pixels at the top and 3 pixels at the
# bottom. Note that this is different from existing libraries such as PyTorch and
# Caffe, which explicitly specify the number of padded pixels and always pad the
# same number of pixels on both sides.
#
# Here is an example of `'SAME'` padding:
#
# >>> in_height = 5
# >>> filter_height = 3
# >>> stride_height = 2
# >>>
# >>> in_width = 2
# >>> filter_width = 2
# >>> stride_width = 1
# >>>
# >>> inp = tf.ones((2, in_height, in_width, 2))
# >>> filter = tf.ones((filter_height, filter_width, 2, 2))
# >>> strides = [stride_height, stride_width]
# >>> output = tf.nn.conv2d(inp, filter, strides, padding='SAME')
# >>> output.shape[1]  # output_height: ceil(5 / 2)
# 3
# >>> output.shape[2] # output_width: ceil(2 / 1)
# 2
#
# ## Explicit padding
#
# Certain ops, like `tf.nn.conv2d`, also allow a list of explicit padding amounts
# to be passed to the `padding` parameter. This list is in the same format as what
# is passed to `tf.pad`, except the padding must be a nested list, not a tensor.
# For example, in the 2D case, the list is in the format `[[0, 0], [pad_top,
# pad_bottom], [pad_left, pad_right], [0, 0]]` when `data_format` is its default
# value of `'NHWC'`. The two `[0, 0]` pairs  indicate the batch and channel
# dimensions have no padding, which is required, as only spatial dimensions can
# have padding.
#
# For example:
#
# >>> inp = tf.ones((1, 3, 3, 1))
# >>> filter = tf.ones((2, 2, 1, 1))
# >>> strides = [1, 1]
# >>> padding = [[0, 0], [1, 2], [0, 1], [0, 0]]
# >>> output = tf.nn.conv2d(inp, filter, strides, padding=padding)
# >>> tuple(output.shape)
# (1, 5, 3, 1)
# >>> # Equivalently, tf.pad can be used, since convolutions pad with zeros.
# >>> inp = tf.pad(inp, padding)
# >>> # 'VALID' means to use no padding in conv2d (we already padded inp)
# >>> output2 = tf.nn.conv2d(inp, filter, strides, padding='VALID')
# >>> tf.debugging.assert_equal(output, output2)
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>tensorflow.keras</u></summary>
# <blockquote>
# <code>
# Public API for tf.keras namespace.
#
# </code>
# <a href='#top_phases'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>1. Data Preparation | Feature Engineering | Library Loading</h1>  <a id='1'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.io.parsers.readers.read_csv</u></summary>
# <blockquote>
# <code>
# Read a comma-separated values (csv) file into DataFrame.
#
# Also supports optionally iterating or breaking of the file
# into chunks.
#
# Additional help can be found in the online docs for
# `IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
#
# Parameters
# ----------
# filepath_or_buffer : str, path object or file-like object
#     Any valid string path is acceptable. The string could be a URL. Valid
#     URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
#     expected. A local file could be: file://localhost/path/to/table.csv.
#
#     If you want to pass in a path object, pandas accepts any ``os.PathLike``.
#
#     By file-like object, we refer to objects with a ``read()`` method, such as
#     a file handle (e.g. via builtin ``open`` function) or ``StringIO``.
# sep : str, default ','
#     Delimiter to use. If sep is None, the C engine cannot automatically detect
#     the separator, but the Python parsing engine can, meaning the latter will
#     be used and automatically detect the separator by Python's builtin sniffer
#     tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
#     different from ``'\s+'`` will be interpreted as regular expressions and
#     will also force the use of the Python parsing engine. Note that regex
#     delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
# delimiter : str, default ``None``
#     Alias for sep.
# header : int, list of int, None, default 'infer'
#     Row number(s) to use as the column names, and the start of the
#     data.  Default behavior is to infer the column names: if no names
#     are passed the behavior is identical to ``header=0`` and column
#     names are inferred from the first line of the file, if column
#     names are passed explicitly then the behavior is identical to
#     ``header=None``. Explicitly pass ``header=0`` to be able to
#     replace existing names. The header can be a list of integers that
#     specify row locations for a multi-index on the columns
#     e.g. [0,1,3]. Intervening rows that are not specified will be
#     skipped (e.g. 2 in this example is skipped). Note that this
#     parameter ignores commented lines and empty lines if
#     ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
#     data rather than the first line of the file.
# names : array-like, optional
#     List of column names to use. If the file contains a header row,
#     then you should explicitly pass ``header=0`` to override the column names.
#     Duplicates in this list are not allowed.
# index_col : int, str, sequence of int / str, or False, optional, default ``None``
#   Column(s) to use as the row labels of the ``DataFrame``, either given as
#   string name or column index. If a sequence of int / str is given, a
#   MultiIndex is used.
#
#   Note: ``index_col=False`` can be used to force pandas to *not* use the first
#   column as the index, e.g. when you have a malformed file with delimiters at
#   the end of each line.
# usecols : list-like or callable, optional
#     Return a subset of the columns. If list-like, all elements must either
#     be positional (i.e. integer indices into the document columns) or strings
#     that correspond to column names provided either by the user in `names` or
#     inferred from the document header row(s). If ``names`` are given, the document
#     header row(s) are not taken into account. For example, a valid list-like
#     `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
#     Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
#     To instantiate a DataFrame from ``data`` with element order preserved use
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
#     in ``['foo', 'bar']`` order or
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
#     for ``['bar', 'foo']`` order.
#
#     If callable, the callable function will be evaluated against the column
#     names, returning names where the callable function evaluates to True. An
#     example of a valid callable argument would be ``lambda x: x.upper() in
#     ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
#     parsing time and lower memory usage.
# squeeze : bool, default False
#     If the parsed data only contains one column then return a Series.
#
#     .. deprecated:: 1.4.0
#         Append ``.squeeze("columns")`` to the call to ``read_csv`` to squeeze
#         the data.
# prefix : str, optional
#     Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
#
#     .. deprecated:: 1.4.0
#        Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
# mangle_dupe_cols : bool, default True
#     Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
#     'X'...'X'. Passing in False will cause data to be overwritten if there
#     are duplicate names in the columns.
# dtype : Type name or dict of column -> type, optional
#     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,
#     'c': 'Int64'}
#     Use `str` or `object` together with suitable `na_values` settings
#     to preserve and not interpret dtype.
#     If converters are specified, they will be applied INSTEAD
#     of dtype conversion.
# engine : {'c', 'python', 'pyarrow'}, optional
#     Parser engine to use. The C and pyarrow engines are faster, while the python engine
#     is currently more feature-complete. Multithreading is currently only supported by
#     the pyarrow engine.
#
#     .. versionadded:: 1.4.0
#
#         The "pyarrow" engine was added as an *experimental* engine, and some features
#         are unsupported, or may not work correctly, with this engine.
# converters : dict, optional
#     Dict of functions for converting values in certain columns. Keys can either
#     be integers or column labels.
# true_values : list, optional
#     Values to consider as True.
# false_values : list, optional
#     Values to consider as False.
# skipinitialspace : bool, default False
#     Skip spaces after delimiter.
# skiprows : list-like, int or callable, optional
#     Line numbers to skip (0-indexed) or number of lines to skip (int)
#     at the start of the file.
#
#     If callable, the callable function will be evaluated against the row
#     indices, returning True if the row should be skipped and False otherwise.
#     An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
# skipfooter : int, default 0
#     Number of lines at bottom of file to skip (Unsupported with engine='c').
# nrows : int, optional
#     Number of rows of file to read. Useful for reading pieces of large files.
# na_values : scalar, str, list-like, or dict, optional
#     Additional strings to recognize as NA/NaN. If dict passed, specific
#     per-column NA values.  By default the following values are interpreted as
#     NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',
#     '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',
#     'nan', 'null'.
# keep_default_na : bool, default True
#     Whether or not to include the default NaN values when parsing the data.
#     Depending on whether `na_values` is passed in, the behavior is as follows:
#
#     * If `keep_default_na` is True, and `na_values` are specified, `na_values`
#       is appended to the default NaN values used for parsing.
#     * If `keep_default_na` is True, and `na_values` are not specified, only
#       the default NaN values are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are specified, only
#       the NaN values specified `na_values` are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are not specified, no
#       strings will be parsed as NaN.
#
#     Note that if `na_filter` is passed in as False, the `keep_default_na` and
#     `na_values` parameters will be ignored.
# na_filter : bool, default True
#     Detect missing value markers (empty strings and the value of na_values). In
#     data without any NAs, passing na_filter=False can improve the performance
#     of reading a large file.
# verbose : bool, default False
#     Indicate number of NA values placed in non-numeric columns.
# skip_blank_lines : bool, default True
#     If True, skip over blank lines rather than interpreting as NaN values.
# parse_dates : bool or list of int or names or list of lists or dict, default False
#     The behavior is as follows:
#
#     * boolean. If True -> try parsing the index.
#     * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
#       each as a separate date column.
#     * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
#       a single date column.
#     * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call
#       result 'foo'
#
#     If a column or index cannot be represented as an array of datetimes,
#     say because of an unparsable value or a mixture of timezones, the column
#     or index will be returned unaltered as an object data type. For
#     non-standard datetime parsing, use ``pd.to_datetime`` after
#     ``pd.read_csv``. To parse an index or column with a mixture of timezones,
#     specify ``date_parser`` to be a partially-applied
#     :func:`pandas.to_datetime` with ``utc=True``. See
#     :ref:`io.csv.mixed_timezones` for more.
#
#     Note: A fast-path exists for iso8601-formatted dates.
# infer_datetime_format : bool, default False
#     If True and `parse_dates` is enabled, pandas will attempt to infer the
#     format of the datetime strings in the columns, and if it can be inferred,
#     switch to a faster method of parsing them. In some cases this can increase
#     the parsing speed by 5-10x.
# keep_date_col : bool, default False
#     If True and `parse_dates` specifies combining multiple columns then
#     keep the original columns.
# date_parser : function, optional
#     Function to use for converting a sequence of string columns to an array of
#     datetime instances. The default uses ``dateutil.parser.parser`` to do the
#     conversion. Pandas will try to call `date_parser` in three different ways,
#     advancing to the next if an exception occurs: 1) Pass one or more arrays
#     (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
#     string values from the columns defined by `parse_dates` into a single array
#     and pass that; and 3) call `date_parser` once for each row using one or
#     more strings (corresponding to the columns defined by `parse_dates`) as
#     arguments.
# dayfirst : bool, default False
#     DD/MM format dates, international and European format.
# cache_dates : bool, default True
#     If True, use a cache of unique, converted dates to apply the datetime
#     conversion. May produce significant speed-up when parsing duplicate
#     date strings, especially ones with timezone offsets.
#
#     .. versionadded:: 0.25.0
# iterator : bool, default False
#     Return TextFileReader object for iteration or getting chunks with
#     ``get_chunk()``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# chunksize : int, optional
#     Return TextFileReader object for iteration.
#     See the `IO Tools docs
#     <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
#     for more information on ``iterator`` and ``chunksize``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# compression : str or dict, default 'infer'
#     For on-the-fly decompression of on-disk data. If 'infer' and '%s' is
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). If using
#     'zip', the ZIP file must contain only one data file to be read in. Set to
#     ``None`` for no decompression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for Zstandard decompression using a
#     custom compression dictionary:
#     ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
#
#     .. versionchanged:: 1.4.0 Zstandard support.
#
# thousands : str, optional
#     Thousands separator.
# decimal : str, default '.'
#     Character to recognize as decimal point (e.g. use ',' for European data).
# lineterminator : str (length 1), optional
#     Character to break file into lines. Only valid with C parser.
# quotechar : str (length 1), optional
#     The character used to denote the start and end of a quoted item. Quoted
#     items can include the delimiter and it will be ignored.
# quoting : int or csv.QUOTE_* instance, default 0
#     Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
#     QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
# doublequote : bool, default ``True``
#    When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
#    whether or not to interpret two consecutive quotechar elements INSIDE a
#    field as a single ``quotechar`` element.
# escapechar : str (length 1), optional
#     One-character string used to escape other characters.
# comment : str, optional
#     Indicates remainder of line should not be parsed. If found at the beginning
#     of a line, the line will be ignored altogether. This parameter must be a
#     single character. Like empty lines (as long as ``skip_blank_lines=True``),
#     fully commented lines are ignored by the parameter `header` but not by
#     `skiprows`. For example, if ``comment='#'``, parsing
#     ``#empty\na,b,c\n1,2,3`` with ``header=0`` will result in 'a,b,c' being
#     treated as the header.
# encoding : str, optional
#     Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
#     standard encodings
#     <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
#
#     .. versionchanged:: 1.2
#
#        When ``encoding`` is ``None``, ``errors="replace"`` is passed to
#        ``open()``. Otherwise, ``errors="strict"`` is passed to ``open()``.
#        This behavior was previously only the case for ``engine="python"``.
#
#     .. versionchanged:: 1.3.0
#
#        ``encoding_errors`` is a new argument. ``encoding`` has no longer an
#        influence on how encoding errors are handled.
#
# encoding_errors : str, optional, default "strict"
#     How encoding errors are treated. `List of possible values
#     <https://docs.python.org/3/library/codecs.html#error-handlers>`_ .
#
#     .. versionadded:: 1.3.0
#
# dialect : str or csv.Dialect, optional
#     If provided, this parameter will override values (default or not) for the
#     following parameters: `delimiter`, `doublequote`, `escapechar`,
#     `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
#     override values, a ParserWarning will be issued. See csv.Dialect
#     documentation for more details.
# error_bad_lines : bool, optional, default ``None``
#     Lines with too many fields (e.g. a csv line with too many commas) will by
#     default cause an exception to be raised, and no DataFrame will be returned.
#     If False, then these "bad lines" will be dropped from the DataFrame that is
#     returned.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# warn_bad_lines : bool, optional, default ``None``
#     If error_bad_lines is False, and warn_bad_lines is True, a warning for each
#     "bad line" will be output.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# on_bad_lines : {'error', 'warn', 'skip'} or callable, default 'error'
#     Specifies what to do upon encountering a bad line (a line with too many fields).
#     Allowed values are :
#
#         - 'error', raise an Exception when a bad line is encountered.
#         - 'warn', raise a warning when a bad line is encountered and skip that line.
#         - 'skip', skip bad lines without raising or warning when they are encountered.
#
#     .. versionadded:: 1.3.0
#
#         - callable, function with signature
#           ``(bad_line: list[str]) -> list[str] | None`` that will process a single
#           bad line. ``bad_line`` is a list of strings split by the ``sep``.
#           If the function returns ``None``, the bad line will be ignored.
#           If the function returns a new list of strings with more elements than
#           expected, a ``ParserWarning`` will be emitted while dropping extra elements.
#           Only supported when ``engine="python"``
#
#     .. versionadded:: 1.4.0
#
# delim_whitespace : bool, default False
#     Specifies whether or not whitespace (e.g. ``' '`` or ``'    '``) will be
#     used as the sep. Equivalent to setting ``sep='\s+'``. If this option
#     is set to True, nothing should be passed in for the ``delimiter``
#     parameter.
# low_memory : bool, default True
#     Internally process the file in chunks, resulting in lower memory use
#     while parsing, but possibly mixed type inference.  To ensure no mixed
#     types either set False, or specify the type with the `dtype` parameter.
#     Note that the entire file is read into a single DataFrame regardless,
#     use the `chunksize` or `iterator` parameter to return the data in chunks.
#     (Only valid with C parser).
# memory_map : bool, default False
#     If a filepath is provided for `filepath_or_buffer`, map the file object
#     directly onto memory and access the data directly from there. Using this
#     option can improve performance because there is no longer any I/O overhead.
# float_precision : str, optional
#     Specifies which converter the C engine should use for floating-point
#     values. The options are ``None`` or 'high' for the ordinary converter,
#     'legacy' for the original lower precision pandas converter, and
#     'round_trip' for the round-trip converter.
#
#     .. versionchanged:: 1.2
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2
#
# Returns
# -------
# DataFrame or TextParser
#     A comma-separated values (csv) file is returned as two-dimensional
#     data structure with labeled axes.
#
# See Also
# --------
# DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_fwf : Read a table of fixed-width formatted lines into DataFrame.
#
# Examples
# --------
# >>> pd.read_csv('data.csv')  # doctest: +SKIP
#
# </code>
# <a href='#1'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u></summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#1'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.replace</u></summary>
# <blockquote>
# <code>
# Replace values given in `to_replace` with `value`.
#
# Values of the DataFrame are replaced with other values dynamically.
#
# This differs from updating with ``.loc`` or ``.iloc``, which require
# you to specify a location to update with some value.
#
# Parameters
# ----------
# to_replace : str, regex, list, dict, Series, int, float, or None
#     How to find the values that will be replaced.
#
#     * numeric, str or regex:
#
#         - numeric: numeric values equal to `to_replace` will be
#           replaced with `value`
#         - str: string exactly matching `to_replace` will be replaced
#           with `value`
#         - regex: regexs matching `to_replace` will be replaced with
#           `value`
#
#     * list of str, regex, or numeric:
#
#         - First, if `to_replace` and `value` are both lists, they
#           **must** be the same length.
#         - Second, if ``regex=True`` then all of the strings in **both**
#           lists will be interpreted as regexs otherwise they will match
#           directly. This doesn't matter much for `value` since there
#           are only a few possible substitution regexes you can use.
#         - str, regex and numeric rules apply as above.
#
#     * dict:
#
#         - Dicts can be used to specify different replacement values
#           for different existing values. For example,
#           ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and
#           'y' with 'z'. To use a dict in this way the `value`
#           parameter should be `None`.
#         - For a DataFrame a dict can specify that different values
#           should be replaced in different columns. For example,
#           ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a'
#           and the value 'z' in column 'b' and replaces these values
#           with whatever is specified in `value`. The `value` parameter
#           should not be ``None`` in this case. You can treat this as a
#           special case of passing two lists except that you are
#           specifying the column to search in.
#         - For a DataFrame nested dictionaries, e.g.,
#           ``{'a': {'b': np.nan}}``, are read as follows: look in column
#           'a' for the value 'b' and replace it with NaN. The `value`
#           parameter should be ``None`` to use a nested dict in this
#           way. You can nest regular expressions as well. Note that
#           column names (the top-level dictionary keys in a nested
#           dictionary) **cannot** be regular expressions.
#
#     * None:
#
#         - This means that the `regex` argument must be a string,
#           compiled regular expression, or list, dict, ndarray or
#           Series of such elements. If `value` is also ``None`` then
#           this **must** be a nested dictionary or Series.
#
#     See the examples section for examples of each of these.
# value : scalar, dict, list, str, regex, default None
#     Value to replace any values matching `to_replace` with.
#     For a DataFrame a dict of values can be used to specify which
#     value to use for each column (columns not in the dict will not be
#     filled). Regular expressions, strings and lists or dicts of such
#     objects are also allowed.
#
# inplace : bool, default False
#     If True, performs operation inplace and returns None.
# limit : int, default None
#     Maximum size gap to forward or backward fill.
# regex : bool or same types as `to_replace`, default False
#     Whether to interpret `to_replace` and/or `value` as regular
#     expressions. If this is ``True`` then `to_replace` *must* be a
#     string. Alternatively, this could be a regular expression or a
#     list, dict, or array of regular expressions in which case
#     `to_replace` must be ``None``.
# method : {'pad', 'ffill', 'bfill', `None`}
#     The method to use when for replacement, when `to_replace` is a
#     scalar, list or tuple and `value` is ``None``.
#
#     .. versionchanged:: 0.23.0
#         Added to DataFrame.
#
# Returns
# -------
# DataFrame
#     Object after replacement.
#
# Raises
# ------
# AssertionError
#     * If `regex` is not a ``bool`` and `to_replace` is not
#       ``None``.
#
# TypeError
#     * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
#     * If `to_replace` is a ``dict`` and `value` is not a ``list``,
#       ``dict``, ``ndarray``, or ``Series``
#     * If `to_replace` is ``None`` and `regex` is not compilable
#       into a regular expression or is a list, dict, ndarray, or
#       Series.
#     * When replacing multiple ``bool`` or ``datetime64`` objects and
#       the arguments to `to_replace` does not match the type of the
#       value being replaced
#
# ValueError
#     * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
#       `value` but they are not the same length.
#
# See Also
# --------
# DataFrame.fillna : Fill NA values.
# DataFrame.where : Replace values based on boolean condition.
# Series.str.replace : Simple string replacement.
#
# Notes
# -----
# * Regex substitution is performed under the hood with ``re.sub``. The
#   rules for substitution for ``re.sub`` are the same.
# * Regular expressions will only substitute on strings, meaning you
#   cannot provide, for example, a regular expression matching floating
#   point numbers and expect the columns in your frame that have a
#   numeric dtype to be matched. However, if those floating point
#   numbers *are* strings, then you can do this.
# * This method has *a lot* of options. You are encouraged to experiment
#   and play with this method to gain intuition about how it works.
# * When dict is used as the `to_replace` value, it is like
#   key(s) in the dict are the to_replace part and
#   value(s) in the dict are the value parameter.
#
# Examples
# --------
#
# **Scalar `to_replace` and `value`**
#
# >>> s = pd.Series([1, 2, 3, 4, 5])
# >>> s.replace(1, 5)
# 0    5
# 1    2
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
# ...                    'B': [5, 6, 7, 8, 9],
# ...                    'C': ['a', 'b', 'c', 'd', 'e']})
# >>> df.replace(0, 5)
#     A  B  C
# 0  5  5  a
# 1  1  6  b
# 2  2  7  c
# 3  3  8  d
# 4  4  9  e
#
# **List-like `to_replace`**
#
# >>> df.replace([0, 1, 2, 3], 4)
#     A  B  C
# 0  4  5  a
# 1  4  6  b
# 2  4  7  c
# 3  4  8  d
# 4  4  9  e
#
# >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
#     A  B  C
# 0  4  5  a
# 1  3  6  b
# 2  2  7  c
# 3  1  8  d
# 4  4  9  e
#
# >>> s.replace([1, 2], method='bfill')
# 0    3
# 1    3
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# **dict-like `to_replace`**
#
# >>> df.replace({0: 10, 1: 100})
#         A  B  C
# 0   10  5  a
# 1  100  6  b
# 2    2  7  c
# 3    3  8  d
# 4    4  9  e
#
# >>> df.replace({'A': 0, 'B': 5}, 100)
#         A    B  C
# 0  100  100  a
# 1    1    6  b
# 2    2    7  c
# 3    3    8  d
# 4    4    9  e
#
# >>> df.replace({'A': {0: 100, 4: 400}})
#         A  B  C
# 0  100  5  a
# 1    1  6  b
# 2    2  7  c
# 3    3  8  d
# 4  400  9  e
#
# **Regular expression `to_replace`**
#
# >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
# ...                    'B': ['abc', 'bar', 'xyz']})
# >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
#         A    B
# 0   new  abc
# 1   foo  bar
# 2  bait  xyz
#
# >>> df.replace(regex=r'^ba.$', value='new')
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
#         A    B
# 0   new  abc
# 1   xyz  new
# 2  bait  xyz
#
# >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
#         A    B
# 0   new  abc
# 1   new  new
# 2  bait  xyz
#
# Compare the behavior of ``s.replace({'a': None})`` and
# ``s.replace('a', None)`` to understand the peculiarities
# of the `to_replace` parameter:
#
# >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
#
# When one uses a dict as the `to_replace` value, it is like the
# value(s) in the dict are equal to the `value` parameter.
# ``s.replace({'a': None})`` is equivalent to
# ``s.replace(to_replace={'a': None}, value=None, method=None)``:
#
# >>> s.replace({'a': None})
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
# When ``value`` is not explicitly passed and `to_replace` is a scalar, list
# or tuple, `replace` uses the method parameter (default 'pad') to do the
# replacement. So this is why the 'a' values are being replaced by 10
# in rows 1 and 2 and 'b' in row 4 in this case.
#
# >>> s.replace('a')
# 0    10
# 1    10
# 2    10
# 3     b
# 4     b
# dtype: object
#
# On the other hand, if ``None`` is explicitly passed for ``value``, it will
# be respected:
#
# >>> s.replace('a', None)
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
#     .. versionchanged:: 1.4.0
#         Previously the explicit ``None`` was silently ignored.
#
# </code>
# <a href='#1'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.tools.numeric.to_numeric</u></summary>
# <blockquote>
# <code>
# Convert argument to a numeric type.
#
# The default return dtype is `float64` or `int64`
# depending on the data supplied. Use the `downcast` parameter
# to obtain other dtypes.
#
# Please note that precision loss may occur if really large numbers
# are passed in. Due to the internal limitations of `ndarray`, if
# numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
# or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
# passed in, it is very likely they will be converted to float so that
# they can stored in an `ndarray`. These warnings apply similarly to
# `Series` since it internally leverages `ndarray`.
#
# Parameters
# ----------
# arg : scalar, list, tuple, 1-d array, or Series
#     Argument to be converted.
# errors : {'ignore', 'raise', 'coerce'}, default 'raise'
#     - If 'raise', then invalid parsing will raise an exception.
#     - If 'coerce', then invalid parsing will be set as NaN.
#     - If 'ignore', then invalid parsing will return the input.
# downcast : str, default None
#     Can be 'integer', 'signed', 'unsigned', or 'float'.
#     If not None, and if the data has been successfully cast to a
#     numerical dtype (or if the data was numeric to begin with),
#     downcast that resulting data to the smallest numerical dtype
#     possible according to the following rules:
#
#     - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
#     - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
#     - 'float': smallest float dtype (min.: np.float32)
#
#     As this behaviour is separate from the core conversion to
#     numeric values, any errors raised during the downcasting
#     will be surfaced regardless of the value of the 'errors' input.
#
#     In addition, downcasting will only occur if the size
#     of the resulting data's dtype is strictly larger than
#     the dtype it is to be cast to, so if none of the dtypes
#     checked satisfy that specification, no downcasting will be
#     performed on the data.
#
# Returns
# -------
# ret
#     Numeric if parsing succeeded.
#     Return type depends on input.  Series if Series, otherwise ndarray.
#
# See Also
# --------
# DataFrame.astype : Cast argument to a specified dtype.
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
# DataFrame.convert_dtypes : Convert dtypes.
#
# Examples
# --------
# Take separate series and convert to numeric, coercing when told to
#
# >>> s = pd.Series(['1.0', '2', -3])
# >>> pd.to_numeric(s)
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float64
# >>> pd.to_numeric(s, downcast='float')
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float32
# >>> pd.to_numeric(s, downcast='signed')
# 0    1
# 1    2
# 2   -3
# dtype: int8
# >>> s = pd.Series(['apple', '1.0', '2', -3])
# >>> pd.to_numeric(s, errors='ignore')
# 0    apple
# 1      1.0
# 2        2
# 3       -3
# dtype: object
# >>> pd.to_numeric(s, errors='coerce')
# 0    NaN
# 1    1.0
# 2    2.0
# 3   -3.0
# dtype: float64
#
# Downcasting of nullable integer and floating dtypes is supported:
#
# >>> s = pd.Series([1, 2, 3], dtype="Int64")
# >>> pd.to_numeric(s, downcast="integer")
# 0    1
# 1    2
# 2    3
# dtype: Int8
# >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
# >>> pd.to_numeric(s, downcast="float")
# 0    1.0
# 1    2.1
# 2    3.0
# dtype: Float32
#
# </code>
# <a href='#1'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <h2 class='hglib'>numpy</h2>
# <ul>
# <li>
# <details><summary><u>numpy.array</u></summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#1'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %% _uuid="8f2839f25d086af736a60e9eeb907d3b93b6e0e5" _cell_guid="b1076dfc-b9ad-4769-8c92-a6c4dae69d19"
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
train = pd.read_csv('/kaggle/input/titanic/train.csv')
train['Sec_Name'] = train['Name'].astype(str).str.split().str[1]
#print(train)
Y_train = np.array(train['Survived'])
X_train = train[['Pclass', 'Sex', 'Age', 'Embarked', 'Sec_Name']]
X_train = X_train.replace('male', 0)
X_train = X_train.replace('female', 1)
X_train['Embarked'] = X_train['Embarked'].replace('S',1)
X_train['Embarked'] = X_train['Embarked'].replace('C',2)
X_train['Embarked'] = X_train['Embarked'].replace('Q',3)
X_train = X_train.replace(np.nan, X_train['Age'].mean())
X_train['Sec_Name'] = X_train['Sec_Name'].replace('Mr.',1)
X_train['Sec_Name'] = X_train['Sec_Name'].replace('Mrs.',2)
X_train['Sec_Name'] = X_train['Sec_Name'].replace('Miss.',3)
X_train['Sec_Name'] = X_train['Sec_Name'].replace('Master.',4)
X_train['Sec_Name'] = pd.to_numeric(X_train['Sec_Name'], errors = 'coerce')
X_train['Sec_Name'] = X_train['Sec_Name'].replace(np.nan,0)
X_train = np.array(X_train)
print(X_train)


# %%
class myCallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if(logs.get('accuracy') > 0.82):
            self.model.stop_training = True
callbacks = myCallback();


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>3. Model Building and Training</h1>  <a id='3'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>keras</h2>
# <ul>
# <li>
# <details><summary><u>keras.layers.core.dense.Dense</u></summary>
# <blockquote>
# <code>
# Just your regular densely-connected NN layer.
#
# `Dense` implements the operation:
# `output = activation(dot(input, kernel) + bias)`
# where `activation` is the element-wise activation function
# passed as the `activation` argument, `kernel` is a weights matrix
# created by the layer, and `bias` is a bias vector created by the layer
# (only applicable if `use_bias` is `True`). These are all attributes of
# `Dense`.
#
# Note: If the input to the layer has a rank greater than 2, then `Dense`
# computes the dot product between the `inputs` and the `kernel` along the
# last axis of the `inputs` and axis 0 of the `kernel` (using `tf.tensordot`).
# For example, if input has dimensions `(batch_size, d0, d1)`,
# then we create a `kernel` with shape `(d1, units)`, and the `kernel` operates
# along axis 2 of the `input`, on every sub-tensor of shape `(1, 1, d1)`
# (there are `batch_size * d0` such sub-tensors).
# The output in this case will have shape `(batch_size, d0, units)`.
#
# Besides, layer attributes cannot be modified after the layer has been called
# once (except the `trainable` attribute).
# When a popular kwarg `input_shape` is passed, then keras will create
# an input layer to insert before the current layer. This can be treated
# equivalent to explicitly defining an `InputLayer`.
#
# Example:
#
# >>> # Create a `Sequential` model and add a Dense layer as the first layer.
# >>> model = tf.keras.models.Sequential()
# >>> model.add(tf.keras.Input(shape=(16,)))
# >>> model.add(tf.keras.layers.Dense(32, activation='relu'))
# >>> # Now the model will take as input arrays of shape (None, 16)
# >>> # and output arrays of shape (None, 32).
# >>> # Note that after the first layer, you don't need to specify
# >>> # the size of the input anymore:
# >>> model.add(tf.keras.layers.Dense(32))
# >>> model.output_shape
# (None, 32)
#
# Args:
#   units: Positive integer, dimensionality of the output space.
#   activation: Activation function to use.
#     If you don't specify anything, no activation is applied
#     (ie. "linear" activation: `a(x) = x`).
#   use_bias: Boolean, whether the layer uses a bias vector.
#   kernel_initializer: Initializer for the `kernel` weights matrix.
#   bias_initializer: Initializer for the bias vector.
#   kernel_regularizer: Regularizer function applied to
#     the `kernel` weights matrix.
#   bias_regularizer: Regularizer function applied to the bias vector.
#   activity_regularizer: Regularizer function applied to
#     the output of the layer (its "activation").
#   kernel_constraint: Constraint function applied to
#     the `kernel` weights matrix.
#   bias_constraint: Constraint function applied to the bias vector.
#
# Input shape:
#   N-D tensor with shape: `(batch_size, ..., input_dim)`.
#   The most common situation would be
#   a 2D input with shape `(batch_size, input_dim)`.
#
# Output shape:
#   N-D tensor with shape: `(batch_size, ..., units)`.
#   For instance, for a 2D input with shape `(batch_size, input_dim)`,
#   the output would have shape `(batch_size, units)`.
#
# </code>
# <a href='#3'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.sequential.Sequential</u></summary>
# <blockquote>
# <code>
# `Sequential` groups a linear stack of layers into a `tf.keras.Model`.
#
# `Sequential` provides training and inference features on this model.
#
# Examples:
#
# ```python
# Optionally, the first layer can receive an `input_shape` argument:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# Afterwards, we do automatic shape inference:
# model.add(tf.keras.layers.Dense(4))
#
# This is identical to the following:
# model = tf.keras.Sequential()
# model.add(tf.keras.Input(shape=(16,)))
# model.add(tf.keras.layers.Dense(8))
#
# Note that you can also omit the `input_shape` argument.
# In that case the model doesn't have any weights until the first call
# to a training/evaluation method (since it isn't yet built):
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.weights not created yet
#
# Whereas if you specify the input shape, the model gets built
# continuously as you are adding layers:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8, input_shape=(16,)))
# model.add(tf.keras.layers.Dense(4))
# len(model.weights)
# Returns "4"
#
# When using the delayed-build pattern (no input shape specified), you can
# choose to manually build your model by calling
# `build(batch_input_shape)`:
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(4))
# model.build((None, 16))
# len(model.weights)
# Returns "4"
#
# Note that when using the delayed-build pattern (no input shape specified),
# the model gets built the first time you call `fit`, `eval`, or `predict`,
# or the first time you call the model on some input data.
# model = tf.keras.Sequential()
# model.add(tf.keras.layers.Dense(8))
# model.add(tf.keras.layers.Dense(1))
# model.compile(optimizer='sgd', loss='mse')
# This builds the model for the first time:
# model.fit(x, y, batch_size=32, epochs=10)
# ```
#
# </code>
# <a href='#3'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.compile</u></summary>
# <blockquote>
# <code>
# Configures the model for training.
#
# Example:
#
# ```python
# model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3),
#               loss=tf.keras.losses.BinaryCrossentropy(),
#               metrics=[tf.keras.metrics.BinaryAccuracy(),
#                        tf.keras.metrics.FalseNegatives()])
# ```
#
# Args:
#     optimizer: String (name of optimizer) or optimizer instance. See
#       `tf.keras.optimizers`.
#     loss: Loss function. Maybe be a string (name of loss function), or
#       a `tf.keras.losses.Loss` instance. See `tf.keras.losses`. A loss
#       function is any callable with the signature `loss = fn(y_true,
#       y_pred)`, where `y_true` are the ground truth values, and
#       `y_pred` are the model's predictions.
#       `y_true` should have shape
#       `(batch_size, d0, .. dN)` (except in the case of
#       sparse loss functions such as
#       sparse categorical crossentropy which expects integer arrays of shape
#       `(batch_size, d0, .. dN-1)`).
#       `y_pred` should have shape `(batch_size, d0, .. dN)`.
#       The loss function should return a float tensor.
#       If a custom `Loss` instance is
#       used and reduction is set to `None`, return value has shape
#       `(batch_size, d0, .. dN-1)` i.e. per-sample or per-timestep loss
#       values; otherwise, it is a scalar. If the model has multiple outputs,
#       you can use a different loss on each output by passing a dictionary
#       or a list of losses. The loss value that will be minimized by the
#       model will then be the sum of all individual losses, unless
#       `loss_weights` is specified.
#     metrics: List of metrics to be evaluated by the model during training
#       and testing. Each of this can be a string (name of a built-in
#       function), function or a `tf.keras.metrics.Metric` instance. See
#       `tf.keras.metrics`. Typically you will use `metrics=['accuracy']`. A
#       function is any callable with the signature `result = fn(y_true,
#       y_pred)`. To specify different metrics for different outputs of a
#       multi-output model, you could also pass a dictionary, such as
#       `metrics={'output_a': 'accuracy', 'output_b': ['accuracy', 'mse']}`.
#       You can also pass a list to specify a metric or a list of metrics
#       for each output, such as `metrics=[['accuracy'], ['accuracy', 'mse']]`
#       or `metrics=['accuracy', ['accuracy', 'mse']]`. When you pass the
#       strings 'accuracy' or 'acc', we convert this to one of
#       `tf.keras.metrics.BinaryAccuracy`,
#       `tf.keras.metrics.CategoricalAccuracy`,
#       `tf.keras.metrics.SparseCategoricalAccuracy` based on the loss
#       function used and the model output shape. We do a similar
#       conversion for the strings 'crossentropy' and 'ce' as well.
#     loss_weights: Optional list or dictionary specifying scalar coefficients
#       (Python floats) to weight the loss contributions of different model
#       outputs. The loss value that will be minimized by the model will then
#       be the *weighted sum* of all individual losses, weighted by the
#       `loss_weights` coefficients.
#         If a list, it is expected to have a 1:1 mapping to the model's
#           outputs. If a dict, it is expected to map output names (strings)
#           to scalar coefficients.
#     weighted_metrics: List of metrics to be evaluated and weighted by
#       `sample_weight` or `class_weight` during training and testing.
#     run_eagerly: Bool. Defaults to `False`. If `True`, this `Model`'s
#       logic will not be wrapped in a `tf.function`. Recommended to leave
#       this as `None` unless your `Model` cannot be run inside a
#       `tf.function`. `run_eagerly=True` is not supported when using
#       `tf.distribute.experimental.ParameterServerStrategy`.
#     steps_per_execution: Int. Defaults to 1. The number of batches to run
#       during each `tf.function` call. Running multiple batches inside a
#       single `tf.function` call can greatly improve performance on TPUs or
#       small models with a large Python overhead. At most, one full epoch
#       will be run each execution. If a number larger than the size of the
#       epoch is passed, the execution will be truncated to the size of the
#       epoch. Note that if `steps_per_execution` is set to `N`,
#       `Callback.on_batch_begin` and `Callback.on_batch_end` methods will
#       only be called every `N` batches (i.e. before/after each `tf.function`
#       execution).
#     jit_compile: If `True`, compile the model training step with XLA.
#       [XLA](https://www.tensorflow.org/xla) is an optimizing compiler for
#       machine learning.
#       `jit_compile` is not enabled for by default.
#       This option cannot be enabled with `run_eagerly=True`.
#       Note that `jit_compile=True` is
#       may not necessarily work for all models.
#       For more information on supported operations please refer to the
#       [XLA documentation](https://www.tensorflow.org/xla).
#       Also refer to
#       [known XLA issues](https://www.tensorflow.org/xla/known_issues) for
#       more details.
#     **kwargs: Arguments supported for backwards compatibility only.
#
# </code>
# <a href='#3'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>keras.engine.training.Model.fit</u></summary>
# <blockquote>
# <code>
# Trains the model for a fixed number of epochs (iterations on a dataset).
#
# Args:
#     x: Input data. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A dict mapping input names to the corresponding array/tensors,
#         if the model has named inputs.
#       - A `tf.data` dataset. Should return a tuple
#         of either `(inputs, targets)` or
#         `(inputs, targets, sample_weights)`.
#       - A generator or `keras.utils.Sequence` returning `(inputs, targets)`
#         or `(inputs, targets, sample_weights)`.
#       - A `tf.keras.utils.experimental.DatasetCreator`, which wraps a
#         callable that takes a single argument of type
#         `tf.distribute.InputContext`, and returns a `tf.data.Dataset`.
#         `DatasetCreator` should be used when users prefer to specify the
#         per-replica batching and sharding logic for the `Dataset`.
#         See `tf.keras.utils.experimental.DatasetCreator` doc for more
#         information.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given below. If using
#       `tf.distribute.experimental.ParameterServerStrategy`, only
#       `DatasetCreator` type is supported for `x`.
#     y: Target data. Like the input data `x`,
#       it could be either Numpy array(s) or TensorFlow tensor(s).
#       It should be consistent with `x` (you cannot have Numpy inputs and
#       tensor targets, or inversely). If `x` is a dataset, generator,
#       or `keras.utils.Sequence` instance, `y` should
#       not be specified (since targets will be obtained from `x`).
#     batch_size: Integer or `None`.
#         Number of samples per gradient update.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     epochs: Integer. Number of epochs to train the model.
#         An epoch is an iteration over the entire `x` and `y`
#         data provided
#         (unless the `steps_per_epoch` flag is set to
#         something other than None).
#         Note that in conjunction with `initial_epoch`,
#         `epochs` is to be understood as "final epoch".
#         The model is not trained for a number of iterations
#         given by `epochs`, but merely until the epoch
#         of index `epochs` is reached.
#     verbose: 'auto', 0, 1, or 2. Verbosity mode.
#         0 = silent, 1 = progress bar, 2 = one line per epoch.
#         'auto' defaults to 1 for most cases, but 2 when used with
#         `ParameterServerStrategy`. Note that the progress bar is not
#         particularly useful when logged to a file, so verbose=2 is
#         recommended when not running interactively (eg, in a production
#         environment).
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during training.
#         See `tf.keras.callbacks`. Note `tf.keras.callbacks.ProgbarLogger`
#         and `tf.keras.callbacks.History` callbacks are created automatically
#         and need not be passed into `model.fit`.
#         `tf.keras.callbacks.ProgbarLogger` is created or not based on
#         `verbose` argument to `model.fit`.
#         Callbacks with batch-level calls are currently unsupported with
#         `tf.distribute.experimental.ParameterServerStrategy`, and users are
#         advised to implement epoch-level calls instead with an appropriate
#         `steps_per_epoch` value.
#     validation_split: Float between 0 and 1.
#         Fraction of the training data to be used as validation data.
#         The model will set apart this fraction of the training data,
#         will not train on it, and will evaluate
#         the loss and any model metrics
#         on this data at the end of each epoch.
#         The validation data is selected from the last samples
#         in the `x` and `y` data provided, before shuffling. This argument is
#         not supported when `x` is a dataset, generator or
#        `keras.utils.Sequence` instance.
#         `validation_split` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     validation_data: Data on which to evaluate
#         the loss and any model metrics at the end of each epoch.
#         The model will not be trained on this data. Thus, note the fact
#         that the validation loss of data provided using `validation_split`
#         or `validation_data` is not affected by regularization layers like
#         noise and dropout.
#         `validation_data` will override `validation_split`.
#         `validation_data` could be:
#           - A tuple `(x_val, y_val)` of Numpy arrays or tensors.
#           - A tuple `(x_val, y_val, val_sample_weights)` of NumPy arrays.
#           - A `tf.data.Dataset`.
#           - A Python generator or `keras.utils.Sequence` returning
#           `(inputs, targets)` or `(inputs, targets, sample_weights)`.
#         `validation_data` is not yet supported with
#         `tf.distribute.experimental.ParameterServerStrategy`.
#     shuffle: Boolean (whether to shuffle the training data
#         before each epoch) or str (for 'batch'). This argument is ignored
#         when `x` is a generator or an object of tf.data.Dataset.
#         'batch' is a special option for dealing
#         with the limitations of HDF5 data; it shuffles in batch-sized
#         chunks. Has no effect when `steps_per_epoch` is not `None`.
#     class_weight: Optional dictionary mapping class indices (integers)
#         to a weight (float) value, used for weighting the loss function
#         (during training only).
#         This can be useful to tell the model to
#         "pay more attention" to samples from
#         an under-represented class.
#     sample_weight: Optional Numpy array of weights for
#         the training samples, used for weighting the loss function
#         (during training only). You can either pass a flat (1D)
#         Numpy array with the same length as the input samples
#         (1:1 mapping between weights and samples),
#         or in the case of temporal data,
#         you can pass a 2D array with shape
#         `(samples, sequence_length)`,
#         to apply a different weight to every timestep of every sample. This
#         argument is not supported when `x` is a dataset, generator, or
#        `keras.utils.Sequence` instance, instead provide the sample_weights
#         as the third element of `x`.
#     initial_epoch: Integer.
#         Epoch at which to start training
#         (useful for resuming a previous training run).
#     steps_per_epoch: Integer or `None`.
#         Total number of steps (batches of samples)
#         before declaring one epoch finished and starting the
#         next epoch. When training with input tensors such as
#         TensorFlow data tensors, the default `None` is equal to
#         the number of samples in your dataset divided by
#         the batch size, or 1 if that cannot be determined. If x is a
#         `tf.data` dataset, and 'steps_per_epoch'
#         is None, the epoch will run until the input dataset is exhausted.
#         When passing an infinitely repeating dataset, you must specify the
#         `steps_per_epoch` argument. If `steps_per_epoch=-1` the training
#         will run indefinitely with an infinitely repeating dataset.
#         This argument is not supported with array inputs.
#         When using `tf.distribute.experimental.ParameterServerStrategy`:
#           * `steps_per_epoch=None` is not supported.
#     validation_steps: Only relevant if `validation_data` is provided and
#         is a `tf.data` dataset. Total number of steps (batches of
#         samples) to draw before stopping when performing validation
#         at the end of every epoch. If 'validation_steps' is None, validation
#         will run until the `validation_data` dataset is exhausted. In the
#         case of an infinitely repeated dataset, it will run into an
#         infinite loop. If 'validation_steps' is specified and only part of
#         the dataset will be consumed, the evaluation will start from the
#         beginning of the dataset at each epoch. This ensures that the same
#         validation samples are used every time.
#     validation_batch_size: Integer or `None`.
#         Number of samples per validation batch.
#         If unspecified, will default to `batch_size`.
#         Do not specify the `validation_batch_size` if your data is in the
#         form of datasets, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     validation_freq: Only relevant if validation data is provided. Integer
#         or `collections.abc.Container` instance (e.g. list, tuple, etc.).
#         If an integer, specifies how many training epochs to run before a
#         new validation run is performed, e.g. `validation_freq=2` runs
#         validation every 2 epochs. If a Container, specifies the epochs on
#         which to run validation, e.g. `validation_freq=[1, 2, 10]` runs
#         validation at the end of the 1st, 2nd, and 10th epochs.
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up
#         when using process-based threading. If unspecified, `workers`
#         will default to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# Unpacking behavior for iterator-like inputs:
#     A common pattern is to pass a tf.data.Dataset, generator, or
#   tf.keras.utils.Sequence to the `x` argument of fit, which will in fact
#   yield not only features (x) but optionally targets (y) and sample weights.
#   Keras requires that the output of such iterator-likes be unambiguous. The
#   iterator should return a tuple of length 1, 2, or 3, where the optional
#   second and third elements will be used for y and sample_weight
#   respectively. Any other type provided will be wrapped in a length one
#   tuple, effectively treating everything as 'x'. When yielding dicts, they
#   should still adhere to the top-level tuple structure.
#   e.g. `({"x0": x0, "x1": x1}, y)`. Keras will not attempt to separate
#   features, targets, and weights from the keys of a single dict.
#     A notable unsupported data type is the namedtuple. The reason is that
#   it behaves like both an ordered datatype (tuple) and a mapping
#   datatype (dict). So given a namedtuple of the form:
#       `namedtuple("example_tuple", ["y", "x"])`
#   it is ambiguous whether to reverse the order of the elements when
#   interpreting the value. Even worse is a tuple of the form:
#       `namedtuple("other_tuple", ["x", "y", "z"])`
#   where it is unclear if the tuple was intended to be unpacked into x, y,
#   and sample_weight or passed through as a single element to `x`. As a
#   result the data processing code will simply raise a ValueError if it
#   encounters a namedtuple. (Along with instructions to remedy the issue.)
#
# Returns:
#     A `History` object. Its `History.history` attribute is
#     a record of training loss values and metrics values
#     at successive epochs, as well as validation loss values
#     and validation metrics values (if applicable).
#
# Raises:
#     RuntimeError: 1. If the model was never compiled or,
#     2. If `model.fit` is  wrapped in `tf.function`.
#
#     ValueError: In case of mismatch between the provided input data
#         and what the model expects or when the input data is empty.
#
# </code>
# <a href='#3'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %% _uuid="d629ff2d2480ee46fbb7e2d37f6b5fab8052498a" _cell_guid="79c7e3d0-c299-4dcb-8224-4455121ee9b0"
model = tf.keras.Sequential([keras.layers.Dense(5, input_dim = 5, activation = tf.nn.relu), tf.keras.layers.Dense(4, activation = tf.nn.relu), tf.keras.layers.Dense(3, activation = tf.nn.relu), tf.keras.layers.Dense(2, activation = tf.nn.relu), tf.keras.layers.Dense(1, activation = tf.nn.sigmoid)])
model.compile(optimizer="Adam", loss = 'binary_crossentropy', metrics = ['accuracy'])
model.fit(X_train, Y_train, validation_split=0.15,epochs = 100,batch_size=5, callbacks = [callbacks])


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>4. Data Preparation | Feature Engineering | Model Building and Training</h1>  <a id='4'></a><small><a href='#top_phases'>back to top</a></small><details><summary><u>View function documentation</u></summary>
# <ul>
#
# <li> <h2 class='hglib'>pandas</h2>
# <ul>
# <li>
# <details><summary><u>pandas.io.parsers.readers.read_csv</u></summary>
# <blockquote>
# <code>
# Read a comma-separated values (csv) file into DataFrame.
#
# Also supports optionally iterating or breaking of the file
# into chunks.
#
# Additional help can be found in the online docs for
# `IO Tools <https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html>`_.
#
# Parameters
# ----------
# filepath_or_buffer : str, path object or file-like object
#     Any valid string path is acceptable. The string could be a URL. Valid
#     URL schemes include http, ftp, s3, gs, and file. For file URLs, a host is
#     expected. A local file could be: file://localhost/path/to/table.csv.
#
#     If you want to pass in a path object, pandas accepts any ``os.PathLike``.
#
#     By file-like object, we refer to objects with a ``read()`` method, such as
#     a file handle (e.g. via builtin ``open`` function) or ``StringIO``.
# sep : str, default ','
#     Delimiter to use. If sep is None, the C engine cannot automatically detect
#     the separator, but the Python parsing engine can, meaning the latter will
#     be used and automatically detect the separator by Python's builtin sniffer
#     tool, ``csv.Sniffer``. In addition, separators longer than 1 character and
#     different from ``'\s+'`` will be interpreted as regular expressions and
#     will also force the use of the Python parsing engine. Note that regex
#     delimiters are prone to ignoring quoted data. Regex example: ``'\r\t'``.
# delimiter : str, default ``None``
#     Alias for sep.
# header : int, list of int, None, default 'infer'
#     Row number(s) to use as the column names, and the start of the
#     data.  Default behavior is to infer the column names: if no names
#     are passed the behavior is identical to ``header=0`` and column
#     names are inferred from the first line of the file, if column
#     names are passed explicitly then the behavior is identical to
#     ``header=None``. Explicitly pass ``header=0`` to be able to
#     replace existing names. The header can be a list of integers that
#     specify row locations for a multi-index on the columns
#     e.g. [0,1,3]. Intervening rows that are not specified will be
#     skipped (e.g. 2 in this example is skipped). Note that this
#     parameter ignores commented lines and empty lines if
#     ``skip_blank_lines=True``, so ``header=0`` denotes the first line of
#     data rather than the first line of the file.
# names : array-like, optional
#     List of column names to use. If the file contains a header row,
#     then you should explicitly pass ``header=0`` to override the column names.
#     Duplicates in this list are not allowed.
# index_col : int, str, sequence of int / str, or False, optional, default ``None``
#   Column(s) to use as the row labels of the ``DataFrame``, either given as
#   string name or column index. If a sequence of int / str is given, a
#   MultiIndex is used.
#
#   Note: ``index_col=False`` can be used to force pandas to *not* use the first
#   column as the index, e.g. when you have a malformed file with delimiters at
#   the end of each line.
# usecols : list-like or callable, optional
#     Return a subset of the columns. If list-like, all elements must either
#     be positional (i.e. integer indices into the document columns) or strings
#     that correspond to column names provided either by the user in `names` or
#     inferred from the document header row(s). If ``names`` are given, the document
#     header row(s) are not taken into account. For example, a valid list-like
#     `usecols` parameter would be ``[0, 1, 2]`` or ``['foo', 'bar', 'baz']``.
#     Element order is ignored, so ``usecols=[0, 1]`` is the same as ``[1, 0]``.
#     To instantiate a DataFrame from ``data`` with element order preserved use
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['foo', 'bar']]`` for columns
#     in ``['foo', 'bar']`` order or
#     ``pd.read_csv(data, usecols=['foo', 'bar'])[['bar', 'foo']]``
#     for ``['bar', 'foo']`` order.
#
#     If callable, the callable function will be evaluated against the column
#     names, returning names where the callable function evaluates to True. An
#     example of a valid callable argument would be ``lambda x: x.upper() in
#     ['AAA', 'BBB', 'DDD']``. Using this parameter results in much faster
#     parsing time and lower memory usage.
# squeeze : bool, default False
#     If the parsed data only contains one column then return a Series.
#
#     .. deprecated:: 1.4.0
#         Append ``.squeeze("columns")`` to the call to ``read_csv`` to squeeze
#         the data.
# prefix : str, optional
#     Prefix to add to column numbers when no header, e.g. 'X' for X0, X1, ...
#
#     .. deprecated:: 1.4.0
#        Use a list comprehension on the DataFrame's columns after calling ``read_csv``.
# mangle_dupe_cols : bool, default True
#     Duplicate columns will be specified as 'X', 'X.1', ...'X.N', rather than
#     'X'...'X'. Passing in False will cause data to be overwritten if there
#     are duplicate names in the columns.
# dtype : Type name or dict of column -> type, optional
#     Data type for data or columns. E.g. {'a': np.float64, 'b': np.int32,
#     'c': 'Int64'}
#     Use `str` or `object` together with suitable `na_values` settings
#     to preserve and not interpret dtype.
#     If converters are specified, they will be applied INSTEAD
#     of dtype conversion.
# engine : {'c', 'python', 'pyarrow'}, optional
#     Parser engine to use. The C and pyarrow engines are faster, while the python engine
#     is currently more feature-complete. Multithreading is currently only supported by
#     the pyarrow engine.
#
#     .. versionadded:: 1.4.0
#
#         The "pyarrow" engine was added as an *experimental* engine, and some features
#         are unsupported, or may not work correctly, with this engine.
# converters : dict, optional
#     Dict of functions for converting values in certain columns. Keys can either
#     be integers or column labels.
# true_values : list, optional
#     Values to consider as True.
# false_values : list, optional
#     Values to consider as False.
# skipinitialspace : bool, default False
#     Skip spaces after delimiter.
# skiprows : list-like, int or callable, optional
#     Line numbers to skip (0-indexed) or number of lines to skip (int)
#     at the start of the file.
#
#     If callable, the callable function will be evaluated against the row
#     indices, returning True if the row should be skipped and False otherwise.
#     An example of a valid callable argument would be ``lambda x: x in [0, 2]``.
# skipfooter : int, default 0
#     Number of lines at bottom of file to skip (Unsupported with engine='c').
# nrows : int, optional
#     Number of rows of file to read. Useful for reading pieces of large files.
# na_values : scalar, str, list-like, or dict, optional
#     Additional strings to recognize as NA/NaN. If dict passed, specific
#     per-column NA values.  By default the following values are interpreted as
#     NaN: '', '#N/A', '#N/A N/A', '#NA', '-1.#IND', '-1.#QNAN', '-NaN', '-nan',
#     '1.#IND', '1.#QNAN', '<NA>', 'N/A', 'NA', 'NULL', 'NaN', 'n/a',
#     'nan', 'null'.
# keep_default_na : bool, default True
#     Whether or not to include the default NaN values when parsing the data.
#     Depending on whether `na_values` is passed in, the behavior is as follows:
#
#     * If `keep_default_na` is True, and `na_values` are specified, `na_values`
#       is appended to the default NaN values used for parsing.
#     * If `keep_default_na` is True, and `na_values` are not specified, only
#       the default NaN values are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are specified, only
#       the NaN values specified `na_values` are used for parsing.
#     * If `keep_default_na` is False, and `na_values` are not specified, no
#       strings will be parsed as NaN.
#
#     Note that if `na_filter` is passed in as False, the `keep_default_na` and
#     `na_values` parameters will be ignored.
# na_filter : bool, default True
#     Detect missing value markers (empty strings and the value of na_values). In
#     data without any NAs, passing na_filter=False can improve the performance
#     of reading a large file.
# verbose : bool, default False
#     Indicate number of NA values placed in non-numeric columns.
# skip_blank_lines : bool, default True
#     If True, skip over blank lines rather than interpreting as NaN values.
# parse_dates : bool or list of int or names or list of lists or dict, default False
#     The behavior is as follows:
#
#     * boolean. If True -> try parsing the index.
#     * list of int or names. e.g. If [1, 2, 3] -> try parsing columns 1, 2, 3
#       each as a separate date column.
#     * list of lists. e.g.  If [[1, 3]] -> combine columns 1 and 3 and parse as
#       a single date column.
#     * dict, e.g. {'foo' : [1, 3]} -> parse columns 1, 3 as date and call
#       result 'foo'
#
#     If a column or index cannot be represented as an array of datetimes,
#     say because of an unparsable value or a mixture of timezones, the column
#     or index will be returned unaltered as an object data type. For
#     non-standard datetime parsing, use ``pd.to_datetime`` after
#     ``pd.read_csv``. To parse an index or column with a mixture of timezones,
#     specify ``date_parser`` to be a partially-applied
#     :func:`pandas.to_datetime` with ``utc=True``. See
#     :ref:`io.csv.mixed_timezones` for more.
#
#     Note: A fast-path exists for iso8601-formatted dates.
# infer_datetime_format : bool, default False
#     If True and `parse_dates` is enabled, pandas will attempt to infer the
#     format of the datetime strings in the columns, and if it can be inferred,
#     switch to a faster method of parsing them. In some cases this can increase
#     the parsing speed by 5-10x.
# keep_date_col : bool, default False
#     If True and `parse_dates` specifies combining multiple columns then
#     keep the original columns.
# date_parser : function, optional
#     Function to use for converting a sequence of string columns to an array of
#     datetime instances. The default uses ``dateutil.parser.parser`` to do the
#     conversion. Pandas will try to call `date_parser` in three different ways,
#     advancing to the next if an exception occurs: 1) Pass one or more arrays
#     (as defined by `parse_dates`) as arguments; 2) concatenate (row-wise) the
#     string values from the columns defined by `parse_dates` into a single array
#     and pass that; and 3) call `date_parser` once for each row using one or
#     more strings (corresponding to the columns defined by `parse_dates`) as
#     arguments.
# dayfirst : bool, default False
#     DD/MM format dates, international and European format.
# cache_dates : bool, default True
#     If True, use a cache of unique, converted dates to apply the datetime
#     conversion. May produce significant speed-up when parsing duplicate
#     date strings, especially ones with timezone offsets.
#
#     .. versionadded:: 0.25.0
# iterator : bool, default False
#     Return TextFileReader object for iteration or getting chunks with
#     ``get_chunk()``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# chunksize : int, optional
#     Return TextFileReader object for iteration.
#     See the `IO Tools docs
#     <https://pandas.pydata.org/pandas-docs/stable/io.html#io-chunking>`_
#     for more information on ``iterator`` and ``chunksize``.
#
#     .. versionchanged:: 1.2
#
#        ``TextFileReader`` is a context manager.
# compression : str or dict, default 'infer'
#     For on-the-fly decompression of on-disk data. If 'infer' and '%s' is
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). If using
#     'zip', the ZIP file must contain only one data file to be read in. Set to
#     ``None`` for no decompression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for Zstandard decompression using a
#     custom compression dictionary:
#     ``compression={'method': 'zstd', 'dict_data': my_compression_dict}``.
#
#     .. versionchanged:: 1.4.0 Zstandard support.
#
# thousands : str, optional
#     Thousands separator.
# decimal : str, default '.'
#     Character to recognize as decimal point (e.g. use ',' for European data).
# lineterminator : str (length 1), optional
#     Character to break file into lines. Only valid with C parser.
# quotechar : str (length 1), optional
#     The character used to denote the start and end of a quoted item. Quoted
#     items can include the delimiter and it will be ignored.
# quoting : int or csv.QUOTE_* instance, default 0
#     Control field quoting behavior per ``csv.QUOTE_*`` constants. Use one of
#     QUOTE_MINIMAL (0), QUOTE_ALL (1), QUOTE_NONNUMERIC (2) or QUOTE_NONE (3).
# doublequote : bool, default ``True``
#    When quotechar is specified and quoting is not ``QUOTE_NONE``, indicate
#    whether or not to interpret two consecutive quotechar elements INSIDE a
#    field as a single ``quotechar`` element.
# escapechar : str (length 1), optional
#     One-character string used to escape other characters.
# comment : str, optional
#     Indicates remainder of line should not be parsed. If found at the beginning
#     of a line, the line will be ignored altogether. This parameter must be a
#     single character. Like empty lines (as long as ``skip_blank_lines=True``),
#     fully commented lines are ignored by the parameter `header` but not by
#     `skiprows`. For example, if ``comment='#'``, parsing
#     ``#empty\na,b,c\n1,2,3`` with ``header=0`` will result in 'a,b,c' being
#     treated as the header.
# encoding : str, optional
#     Encoding to use for UTF when reading/writing (ex. 'utf-8'). `List of Python
#     standard encodings
#     <https://docs.python.org/3/library/codecs.html#standard-encodings>`_ .
#
#     .. versionchanged:: 1.2
#
#        When ``encoding`` is ``None``, ``errors="replace"`` is passed to
#        ``open()``. Otherwise, ``errors="strict"`` is passed to ``open()``.
#        This behavior was previously only the case for ``engine="python"``.
#
#     .. versionchanged:: 1.3.0
#
#        ``encoding_errors`` is a new argument. ``encoding`` has no longer an
#        influence on how encoding errors are handled.
#
# encoding_errors : str, optional, default "strict"
#     How encoding errors are treated. `List of possible values
#     <https://docs.python.org/3/library/codecs.html#error-handlers>`_ .
#
#     .. versionadded:: 1.3.0
#
# dialect : str or csv.Dialect, optional
#     If provided, this parameter will override values (default or not) for the
#     following parameters: `delimiter`, `doublequote`, `escapechar`,
#     `skipinitialspace`, `quotechar`, and `quoting`. If it is necessary to
#     override values, a ParserWarning will be issued. See csv.Dialect
#     documentation for more details.
# error_bad_lines : bool, optional, default ``None``
#     Lines with too many fields (e.g. a csv line with too many commas) will by
#     default cause an exception to be raised, and no DataFrame will be returned.
#     If False, then these "bad lines" will be dropped from the DataFrame that is
#     returned.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# warn_bad_lines : bool, optional, default ``None``
#     If error_bad_lines is False, and warn_bad_lines is True, a warning for each
#     "bad line" will be output.
#
#     .. deprecated:: 1.3.0
#        The ``on_bad_lines`` parameter should be used instead to specify behavior upon
#        encountering a bad line instead.
# on_bad_lines : {'error', 'warn', 'skip'} or callable, default 'error'
#     Specifies what to do upon encountering a bad line (a line with too many fields).
#     Allowed values are :
#
#         - 'error', raise an Exception when a bad line is encountered.
#         - 'warn', raise a warning when a bad line is encountered and skip that line.
#         - 'skip', skip bad lines without raising or warning when they are encountered.
#
#     .. versionadded:: 1.3.0
#
#         - callable, function with signature
#           ``(bad_line: list[str]) -> list[str] | None`` that will process a single
#           bad line. ``bad_line`` is a list of strings split by the ``sep``.
#           If the function returns ``None``, the bad line will be ignored.
#           If the function returns a new list of strings with more elements than
#           expected, a ``ParserWarning`` will be emitted while dropping extra elements.
#           Only supported when ``engine="python"``
#
#     .. versionadded:: 1.4.0
#
# delim_whitespace : bool, default False
#     Specifies whether or not whitespace (e.g. ``' '`` or ``'    '``) will be
#     used as the sep. Equivalent to setting ``sep='\s+'``. If this option
#     is set to True, nothing should be passed in for the ``delimiter``
#     parameter.
# low_memory : bool, default True
#     Internally process the file in chunks, resulting in lower memory use
#     while parsing, but possibly mixed type inference.  To ensure no mixed
#     types either set False, or specify the type with the `dtype` parameter.
#     Note that the entire file is read into a single DataFrame regardless,
#     use the `chunksize` or `iterator` parameter to return the data in chunks.
#     (Only valid with C parser).
# memory_map : bool, default False
#     If a filepath is provided for `filepath_or_buffer`, map the file object
#     directly onto memory and access the data directly from there. Using this
#     option can improve performance because there is no longer any I/O overhead.
# float_precision : str, optional
#     Specifies which converter the C engine should use for floating-point
#     values. The options are ``None`` or 'high' for the ordinary converter,
#     'legacy' for the original lower precision pandas converter, and
#     'round_trip' for the round-trip converter.
#
#     .. versionchanged:: 1.2
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2
#
# Returns
# -------
# DataFrame or TextParser
#     A comma-separated values (csv) file is returned as two-dimensional
#     data structure with labeled axes.
#
# See Also
# --------
# DataFrame.to_csv : Write DataFrame to a comma-separated values (csv) file.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_fwf : Read a table of fixed-width formatted lines into DataFrame.
#
# Examples
# --------
# >>> pd.read_csv('data.csv')  # doctest: +SKIP
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.astype</u></summary>
# <blockquote>
# <code>
# Cast a pandas object to a specified dtype ``dtype``.
#
# Parameters
# ----------
# dtype : data type, or dict of column name -> data type
#     Use a numpy.dtype or Python type to cast entire pandas object to
#     the same type. Alternatively, use {col: dtype, ...}, where col is a
#     column label and dtype is a numpy.dtype or Python type to cast one
#     or more of the DataFrame's columns to column-specific types.
# copy : bool, default True
#     Return a copy when ``copy=True`` (be very careful setting
#     ``copy=False`` as changes to values then may propagate to other
#     pandas objects).
# errors : {'raise', 'ignore'}, default 'raise'
#     Control raising of exceptions on invalid data for provided dtype.
#
#     - ``raise`` : allow exceptions to be raised
#     - ``ignore`` : suppress exceptions. On error return original object.
#
# Returns
# -------
# casted : same type as caller
#
# See Also
# --------
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# to_numeric : Convert argument to a numeric type.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
#
# Notes
# -----
# .. deprecated:: 1.3.0
#
#     Using ``astype`` to convert from timezone-naive dtype to
#     timezone-aware dtype is deprecated and will raise in a
#     future version.  Use :meth:`Series.dt.tz_localize` instead.
#
# Examples
# --------
# Create a DataFrame:
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# Cast all columns to int32:
#
# >>> df.astype('int32').dtypes
# col1    int32
# col2    int32
# dtype: object
#
# Cast col1 to int32 using a dictionary:
#
# >>> df.astype({'col1': 'int32'}).dtypes
# col1    int32
# col2    int64
# dtype: object
#
# Create a series:
#
# >>> ser = pd.Series([1, 2], dtype='int32')
# >>> ser
# 0    1
# 1    2
# dtype: int32
# >>> ser.astype('int64')
# 0    1
# 1    2
# dtype: int64
#
# Convert to categorical type:
#
# >>> ser.astype('category')
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [1, 2]
#
# Convert to ordered categorical type with custom ordering:
#
# >>> from pandas.api.types import CategoricalDtype
# >>> cat_dtype = CategoricalDtype(
# ...     categories=[2, 1], ordered=True)
# >>> ser.astype(cat_dtype)
# 0    1
# 1    2
# dtype: category
# Categories (2, int64): [2 < 1]
#
# Note that using ``copy=False`` and changing data on a new
# pandas object may propagate changes:
#
# >>> s1 = pd.Series([1, 2])
# >>> s2 = s1.astype('int64', copy=False)
# >>> s2[0] = 10
# >>> s1  # note that s1[0] has changed too
# 0    10
# 1     2
# dtype: int64
#
# Create a series of dates:
#
# >>> ser_date = pd.Series(pd.date_range('20200101', periods=3))
# >>> ser_date
# 0   2020-01-01
# 1   2020-01-02
# 2   2020-01-03
# dtype: datetime64[ns]
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame.replace</u></summary>
# <blockquote>
# <code>
# Replace values given in `to_replace` with `value`.
#
# Values of the DataFrame are replaced with other values dynamically.
#
# This differs from updating with ``.loc`` or ``.iloc``, which require
# you to specify a location to update with some value.
#
# Parameters
# ----------
# to_replace : str, regex, list, dict, Series, int, float, or None
#     How to find the values that will be replaced.
#
#     * numeric, str or regex:
#
#         - numeric: numeric values equal to `to_replace` will be
#           replaced with `value`
#         - str: string exactly matching `to_replace` will be replaced
#           with `value`
#         - regex: regexs matching `to_replace` will be replaced with
#           `value`
#
#     * list of str, regex, or numeric:
#
#         - First, if `to_replace` and `value` are both lists, they
#           **must** be the same length.
#         - Second, if ``regex=True`` then all of the strings in **both**
#           lists will be interpreted as regexs otherwise they will match
#           directly. This doesn't matter much for `value` since there
#           are only a few possible substitution regexes you can use.
#         - str, regex and numeric rules apply as above.
#
#     * dict:
#
#         - Dicts can be used to specify different replacement values
#           for different existing values. For example,
#           ``{'a': 'b', 'y': 'z'}`` replaces the value 'a' with 'b' and
#           'y' with 'z'. To use a dict in this way the `value`
#           parameter should be `None`.
#         - For a DataFrame a dict can specify that different values
#           should be replaced in different columns. For example,
#           ``{'a': 1, 'b': 'z'}`` looks for the value 1 in column 'a'
#           and the value 'z' in column 'b' and replaces these values
#           with whatever is specified in `value`. The `value` parameter
#           should not be ``None`` in this case. You can treat this as a
#           special case of passing two lists except that you are
#           specifying the column to search in.
#         - For a DataFrame nested dictionaries, e.g.,
#           ``{'a': {'b': np.nan}}``, are read as follows: look in column
#           'a' for the value 'b' and replace it with NaN. The `value`
#           parameter should be ``None`` to use a nested dict in this
#           way. You can nest regular expressions as well. Note that
#           column names (the top-level dictionary keys in a nested
#           dictionary) **cannot** be regular expressions.
#
#     * None:
#
#         - This means that the `regex` argument must be a string,
#           compiled regular expression, or list, dict, ndarray or
#           Series of such elements. If `value` is also ``None`` then
#           this **must** be a nested dictionary or Series.
#
#     See the examples section for examples of each of these.
# value : scalar, dict, list, str, regex, default None
#     Value to replace any values matching `to_replace` with.
#     For a DataFrame a dict of values can be used to specify which
#     value to use for each column (columns not in the dict will not be
#     filled). Regular expressions, strings and lists or dicts of such
#     objects are also allowed.
#
# inplace : bool, default False
#     If True, performs operation inplace and returns None.
# limit : int, default None
#     Maximum size gap to forward or backward fill.
# regex : bool or same types as `to_replace`, default False
#     Whether to interpret `to_replace` and/or `value` as regular
#     expressions. If this is ``True`` then `to_replace` *must* be a
#     string. Alternatively, this could be a regular expression or a
#     list, dict, or array of regular expressions in which case
#     `to_replace` must be ``None``.
# method : {'pad', 'ffill', 'bfill', `None`}
#     The method to use when for replacement, when `to_replace` is a
#     scalar, list or tuple and `value` is ``None``.
#
#     .. versionchanged:: 0.23.0
#         Added to DataFrame.
#
# Returns
# -------
# DataFrame
#     Object after replacement.
#
# Raises
# ------
# AssertionError
#     * If `regex` is not a ``bool`` and `to_replace` is not
#       ``None``.
#
# TypeError
#     * If `to_replace` is not a scalar, array-like, ``dict``, or ``None``
#     * If `to_replace` is a ``dict`` and `value` is not a ``list``,
#       ``dict``, ``ndarray``, or ``Series``
#     * If `to_replace` is ``None`` and `regex` is not compilable
#       into a regular expression or is a list, dict, ndarray, or
#       Series.
#     * When replacing multiple ``bool`` or ``datetime64`` objects and
#       the arguments to `to_replace` does not match the type of the
#       value being replaced
#
# ValueError
#     * If a ``list`` or an ``ndarray`` is passed to `to_replace` and
#       `value` but they are not the same length.
#
# See Also
# --------
# DataFrame.fillna : Fill NA values.
# DataFrame.where : Replace values based on boolean condition.
# Series.str.replace : Simple string replacement.
#
# Notes
# -----
# * Regex substitution is performed under the hood with ``re.sub``. The
#   rules for substitution for ``re.sub`` are the same.
# * Regular expressions will only substitute on strings, meaning you
#   cannot provide, for example, a regular expression matching floating
#   point numbers and expect the columns in your frame that have a
#   numeric dtype to be matched. However, if those floating point
#   numbers *are* strings, then you can do this.
# * This method has *a lot* of options. You are encouraged to experiment
#   and play with this method to gain intuition about how it works.
# * When dict is used as the `to_replace` value, it is like
#   key(s) in the dict are the to_replace part and
#   value(s) in the dict are the value parameter.
#
# Examples
# --------
#
# **Scalar `to_replace` and `value`**
#
# >>> s = pd.Series([1, 2, 3, 4, 5])
# >>> s.replace(1, 5)
# 0    5
# 1    2
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# >>> df = pd.DataFrame({'A': [0, 1, 2, 3, 4],
# ...                    'B': [5, 6, 7, 8, 9],
# ...                    'C': ['a', 'b', 'c', 'd', 'e']})
# >>> df.replace(0, 5)
#     A  B  C
# 0  5  5  a
# 1  1  6  b
# 2  2  7  c
# 3  3  8  d
# 4  4  9  e
#
# **List-like `to_replace`**
#
# >>> df.replace([0, 1, 2, 3], 4)
#     A  B  C
# 0  4  5  a
# 1  4  6  b
# 2  4  7  c
# 3  4  8  d
# 4  4  9  e
#
# >>> df.replace([0, 1, 2, 3], [4, 3, 2, 1])
#     A  B  C
# 0  4  5  a
# 1  3  6  b
# 2  2  7  c
# 3  1  8  d
# 4  4  9  e
#
# >>> s.replace([1, 2], method='bfill')
# 0    3
# 1    3
# 2    3
# 3    4
# 4    5
# dtype: int64
#
# **dict-like `to_replace`**
#
# >>> df.replace({0: 10, 1: 100})
#         A  B  C
# 0   10  5  a
# 1  100  6  b
# 2    2  7  c
# 3    3  8  d
# 4    4  9  e
#
# >>> df.replace({'A': 0, 'B': 5}, 100)
#         A    B  C
# 0  100  100  a
# 1    1    6  b
# 2    2    7  c
# 3    3    8  d
# 4    4    9  e
#
# >>> df.replace({'A': {0: 100, 4: 400}})
#         A  B  C
# 0  100  5  a
# 1    1  6  b
# 2    2  7  c
# 3    3  8  d
# 4  400  9  e
#
# **Regular expression `to_replace`**
#
# >>> df = pd.DataFrame({'A': ['bat', 'foo', 'bait'],
# ...                    'B': ['abc', 'bar', 'xyz']})
# >>> df.replace(to_replace=r'^ba.$', value='new', regex=True)
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace({'A': r'^ba.$'}, {'A': 'new'}, regex=True)
#         A    B
# 0   new  abc
# 1   foo  bar
# 2  bait  xyz
#
# >>> df.replace(regex=r'^ba.$', value='new')
#         A    B
# 0   new  abc
# 1   foo  new
# 2  bait  xyz
#
# >>> df.replace(regex={r'^ba.$': 'new', 'foo': 'xyz'})
#         A    B
# 0   new  abc
# 1   xyz  new
# 2  bait  xyz
#
# >>> df.replace(regex=[r'^ba.$', 'foo'], value='new')
#         A    B
# 0   new  abc
# 1   new  new
# 2  bait  xyz
#
# Compare the behavior of ``s.replace({'a': None})`` and
# ``s.replace('a', None)`` to understand the peculiarities
# of the `to_replace` parameter:
#
# >>> s = pd.Series([10, 'a', 'a', 'b', 'a'])
#
# When one uses a dict as the `to_replace` value, it is like the
# value(s) in the dict are equal to the `value` parameter.
# ``s.replace({'a': None})`` is equivalent to
# ``s.replace(to_replace={'a': None}, value=None, method=None)``:
#
# >>> s.replace({'a': None})
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
# When ``value`` is not explicitly passed and `to_replace` is a scalar, list
# or tuple, `replace` uses the method parameter (default 'pad') to do the
# replacement. So this is why the 'a' values are being replaced by 10
# in rows 1 and 2 and 'b' in row 4 in this case.
#
# >>> s.replace('a')
# 0    10
# 1    10
# 2    10
# 3     b
# 4     b
# dtype: object
#
# On the other hand, if ``None`` is explicitly passed for ``value``, it will
# be respected:
#
# >>> s.replace('a', None)
# 0      10
# 1    None
# 2    None
# 3       b
# 4    None
# dtype: object
#
#     .. versionchanged:: 1.4.0
#         Previously the explicit ``None`` was silently ignored.
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.tools.numeric.to_numeric</u></summary>
# <blockquote>
# <code>
# Convert argument to a numeric type.
#
# The default return dtype is `float64` or `int64`
# depending on the data supplied. Use the `downcast` parameter
# to obtain other dtypes.
#
# Please note that precision loss may occur if really large numbers
# are passed in. Due to the internal limitations of `ndarray`, if
# numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
# or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
# passed in, it is very likely they will be converted to float so that
# they can stored in an `ndarray`. These warnings apply similarly to
# `Series` since it internally leverages `ndarray`.
#
# Parameters
# ----------
# arg : scalar, list, tuple, 1-d array, or Series
#     Argument to be converted.
# errors : {'ignore', 'raise', 'coerce'}, default 'raise'
#     - If 'raise', then invalid parsing will raise an exception.
#     - If 'coerce', then invalid parsing will be set as NaN.
#     - If 'ignore', then invalid parsing will return the input.
# downcast : str, default None
#     Can be 'integer', 'signed', 'unsigned', or 'float'.
#     If not None, and if the data has been successfully cast to a
#     numerical dtype (or if the data was numeric to begin with),
#     downcast that resulting data to the smallest numerical dtype
#     possible according to the following rules:
#
#     - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
#     - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
#     - 'float': smallest float dtype (min.: np.float32)
#
#     As this behaviour is separate from the core conversion to
#     numeric values, any errors raised during the downcasting
#     will be surfaced regardless of the value of the 'errors' input.
#
#     In addition, downcasting will only occur if the size
#     of the resulting data's dtype is strictly larger than
#     the dtype it is to be cast to, so if none of the dtypes
#     checked satisfy that specification, no downcasting will be
#     performed on the data.
#
# Returns
# -------
# ret
#     Numeric if parsing succeeded.
#     Return type depends on input.  Series if Series, otherwise ndarray.
#
# See Also
# --------
# DataFrame.astype : Cast argument to a specified dtype.
# to_datetime : Convert argument to datetime.
# to_timedelta : Convert argument to timedelta.
# numpy.ndarray.astype : Cast a numpy array to a specified type.
# DataFrame.convert_dtypes : Convert dtypes.
#
# Examples
# --------
# Take separate series and convert to numeric, coercing when told to
#
# >>> s = pd.Series(['1.0', '2', -3])
# >>> pd.to_numeric(s)
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float64
# >>> pd.to_numeric(s, downcast='float')
# 0    1.0
# 1    2.0
# 2   -3.0
# dtype: float32
# >>> pd.to_numeric(s, downcast='signed')
# 0    1
# 1    2
# 2   -3
# dtype: int8
# >>> s = pd.Series(['apple', '1.0', '2', -3])
# >>> pd.to_numeric(s, errors='ignore')
# 0    apple
# 1      1.0
# 2        2
# 3       -3
# dtype: object
# >>> pd.to_numeric(s, errors='coerce')
# 0    NaN
# 1    1.0
# 2    2.0
# 3   -3.0
# dtype: float64
#
# Downcasting of nullable integer and floating dtypes is supported:
#
# >>> s = pd.Series([1, 2, 3], dtype="Int64")
# >>> pd.to_numeric(s, downcast="integer")
# 0    1
# 1    2
# 2    3
# dtype: Int8
# >>> s = pd.Series([1.0, 2.1, 3.0], dtype="Float64")
# >>> pd.to_numeric(s, downcast="float")
# 0    1.0
# 1    2.1
# 2    3.0
# dtype: Float32
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.frame.DataFrame</u></summary>
# <blockquote>
# <code>
# Two-dimensional, size-mutable, potentially heterogeneous tabular data.
#
# Data structure also contains labeled axes (rows and columns).
# Arithmetic operations align on both row and column labels. Can be
# thought of as a dict-like container for Series objects. The primary
# pandas data structure.
#
# Parameters
# ----------
# data : ndarray (structured or homogeneous), Iterable, dict, or DataFrame
#     Dict can contain Series, arrays, constants, dataclass or list-like objects. If
#     data is a dict, column order follows insertion-order. If a dict contains Series
#     which have an index defined, it is aligned by its index.
#
#     .. versionchanged:: 0.25.0
#        If data is a list of dicts, column order follows insertion-order.
#
# index : Index or array-like
#     Index to use for resulting frame. Will default to RangeIndex if
#     no indexing information part of input data and no index provided.
# columns : Index or array-like
#     Column labels to use for resulting frame when data does not have them,
#     defaulting to RangeIndex(0, 1, 2, ..., n). If data contains column labels,
#     will perform column selection instead.
# dtype : dtype, default None
#     Data type to force. Only a single dtype is allowed. If None, infer.
# copy : bool or None, default None
#     Copy data from inputs.
#     For dict data, the default of None behaves like ``copy=True``.  For DataFrame
#     or 2d ndarray input, the default of None behaves like ``copy=False``.
#
#     .. versionchanged:: 1.3.0
#
# See Also
# --------
# DataFrame.from_records : Constructor from tuples, also record arrays.
# DataFrame.from_dict : From dicts of Series, arrays, or dicts.
# read_csv : Read a comma-separated values (csv) file into DataFrame.
# read_table : Read general delimited file into DataFrame.
# read_clipboard : Read text from clipboard into DataFrame.
#
# Examples
# --------
# Constructing DataFrame from a dictionary.
#
# >>> d = {'col1': [1, 2], 'col2': [3, 4]}
# >>> df = pd.DataFrame(data=d)
# >>> df
#    col1  col2
# 0     1     3
# 1     2     4
#
# Notice that the inferred dtype is int64.
#
# >>> df.dtypes
# col1    int64
# col2    int64
# dtype: object
#
# To enforce a single dtype:
#
# >>> df = pd.DataFrame(data=d, dtype=np.int8)
# >>> df.dtypes
# col1    int8
# col2    int8
# dtype: object
#
# Constructing DataFrame from a dictionary including Series:
#
# >>> d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2, 3], index=[2, 3])}
# >>> pd.DataFrame(data=d, index=[0, 1, 2, 3])
#    col1  col2
# 0     0   NaN
# 1     1   NaN
# 2     2   2.0
# 3     3   3.0
#
# Constructing DataFrame from numpy ndarray:
#
# >>> df2 = pd.DataFrame(np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]),
# ...                    columns=['a', 'b', 'c'])
# >>> df2
#    a  b  c
# 0  1  2  3
# 1  4  5  6
# 2  7  8  9
#
# Constructing DataFrame from a numpy ndarray that has labeled columns:
#
# >>> data = np.array([(1, 2, 3), (4, 5, 6), (7, 8, 9)],
# ...                 dtype=[("a", "i4"), ("b", "i4"), ("c", "i4")])
# >>> df3 = pd.DataFrame(data, columns=['c', 'a'])
# ...
# >>> df3
#    c  a
# 0  3  1
# 1  6  4
# 2  9  7
#
# Constructing DataFrame from dataclass:
#
# >>> from dataclasses import make_dataclass
# >>> Point = make_dataclass("Point", [("x", int), ("y", int)])
# >>> pd.DataFrame([Point(0, 0), Point(0, 3), Point(2, 3)])
#    x  y
# 0  0  0
# 1  0  3
# 2  2  3
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>pandas.core.generic.NDFrame.to_csv</u></summary>
# <blockquote>
# <code>
# Write object to a comma-separated values (csv) file.
#
# Parameters
# ----------
# path_or_buf : str, path object, file-like object, or None, default None
#     String, path object (implementing os.PathLike[str]), or file-like
#     object implementing a write() function. If None, the result is
#     returned as a string. If a non-binary file object is passed, it should
#     be opened with `newline=''`, disabling universal newlines. If a binary
#     file object is passed, `mode` might need to contain a `'b'`.
#
#     .. versionchanged:: 1.2.0
#
#        Support for binary file objects was introduced.
#
# sep : str, default ','
#     String of length 1. Field delimiter for the output file.
# na_rep : str, default ''
#     Missing data representation.
# float_format : str, default None
#     Format string for floating point numbers.
# columns : sequence, optional
#     Columns to write.
# header : bool or list of str, default True
#     Write out the column names. If a list of strings is given it is
#     assumed to be aliases for the column names.
# index : bool, default True
#     Write row names (index).
# index_label : str or sequence, or False, default None
#     Column label for index column(s) if desired. If None is given, and
#     `header` and `index` are True, then the index names are used. A
#     sequence should be given if the object uses MultiIndex. If
#     False do not print fields for index names. Use index_label=False
#     for easier importing in R.
# mode : str
#     Python write mode, default 'w'.
# encoding : str, optional
#     A string representing the encoding to use in the output file,
#     defaults to 'utf-8'. `encoding` is not supported if `path_or_buf`
#     is a non-binary file object.
# compression : str or dict, default 'infer'
#     For on-the-fly compression of the output data. If 'infer' and '%s'
#     path-like, then detect compression from the following extensions: '.gz',
#     '.bz2', '.zip', '.xz', or '.zst' (otherwise no compression). Set to
#     ``None`` for no compression. Can also be a dict with key ``'method'`` set
#     to one of {``'zip'``, ``'gzip'``, ``'bz2'``, ``'zstd'``} and other
#     key-value pairs are forwarded to ``zipfile.ZipFile``, ``gzip.GzipFile``,
#     ``bz2.BZ2File``, or ``zstandard.ZstdDecompressor``, respectively. As an
#     example, the following could be passed for faster compression and to create
#     a reproducible gzip archive:
#     ``compression={'method': 'gzip', 'compresslevel': 1, 'mtime': 1}``.
#
#     .. versionchanged:: 1.0.0
#
#        May now be a dict with key 'method' as compression mode
#        and other entries as additional compression options if
#        compression mode is 'zip'.
#
#     .. versionchanged:: 1.1.0
#
#        Passing compression options as keys in dict is
#        supported for compression modes 'gzip', 'bz2', 'zstd', and 'zip'.
#
#     .. versionchanged:: 1.2.0
#
#         Compression is supported for binary file objects.
#
#     .. versionchanged:: 1.2.0
#
#         Previous versions forwarded dict entries for 'gzip' to
#         `gzip.open` instead of `gzip.GzipFile` which prevented
#         setting `mtime`.
#
# quoting : optional constant from csv module
#     Defaults to csv.QUOTE_MINIMAL. If you have set a `float_format`
#     then floats are converted to strings and thus csv.QUOTE_NONNUMERIC
#     will treat them as non-numeric.
# quotechar : str, default '\"'
#     String of length 1. Character used to quote fields.
# line_terminator : str, optional
#     The newline character or character sequence to use in the output
#     file. Defaults to `os.linesep`, which depends on the OS in which
#     this method is called ('\\n' for linux, '\\r\\n' for Windows, i.e.).
# chunksize : int or None
#     Rows to write at a time.
# date_format : str, default None
#     Format string for datetime objects.
# doublequote : bool, default True
#     Control quoting of `quotechar` inside a field.
# escapechar : str, default None
#     String of length 1. Character used to escape `sep` and `quotechar`
#     when appropriate.
# decimal : str, default '.'
#     Character recognized as decimal separator. E.g. use ',' for
#     European data.
# errors : str, default 'strict'
#     Specifies how encoding and decoding errors are to be handled.
#     See the errors argument for :func:`open` for a full list
#     of options.
#
#     .. versionadded:: 1.1.0
#
# storage_options : dict, optional
#     Extra options that make sense for a particular storage connection, e.g.
#     host, port, username, password, etc. For HTTP(S) URLs the key-value pairs
#     are forwarded to ``urllib`` as header options. For other URLs (e.g.
#     starting with "s3://", and "gcs://") the key-value pairs are forwarded to
#     ``fsspec``. Please see ``fsspec`` and ``urllib`` for more details.
#
#     .. versionadded:: 1.2.0
#
# Returns
# -------
# None or str
#     If path_or_buf is None, returns the resulting csv format as a
#     string. Otherwise returns None.
#
# See Also
# --------
# read_csv : Load a CSV file into a DataFrame.
# to_excel : Write DataFrame to an Excel file.
#
# Examples
# --------
# >>> df = pd.DataFrame({'name': ['Raphael', 'Donatello'],
# ...                    'mask': ['red', 'purple'],
# ...                    'weapon': ['sai', 'bo staff']})
# >>> df.to_csv(index=False)
# 'name,mask,weapon\nRaphael,red,sai\nDonatello,purple,bo staff\n'
#
# Create 'out.zip' containing 'out.csv'
#
# >>> compression_opts = dict(method='zip',
# ...                         archive_name='out.csv')  # doctest: +SKIP
# >>> df.to_csv('out.zip', index=False,
# ...           compression=compression_opts)  # doctest: +SKIP
#
# To write a csv file to a new folder or nested folder you will first
# need to create it using either Pathlib or os:
#
# >>> from pathlib import Path  # doctest: +SKIP
# >>> filepath = Path('folder/subfolder/out.csv')  # doctest: +SKIP
# >>> filepath.parent.mkdir(parents=True, exist_ok=True)  # doctest: +SKIP
# >>> df.to_csv(filepath)  # doctest: +SKIP
#
# >>> import os  # doctest: +SKIP
# >>> os.makedirs('folder/subfolder', exist_ok=True)  # doctest: +SKIP
# >>> df.to_csv('folder/subfolder/out.csv')  # doctest: +SKIP
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <h2 class='hglib'>numpy</h2>
# <ul>
# <li>
# <details><summary><u>numpy.array</u></summary>
# <blockquote>
# <code>
# array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0,
#       like=None)
#
# Create an array.
#
# Parameters
# ----------
# object : array_like
#     An array, any object exposing the array interface, an object whose
#     __array__ method returns an array, or any (nested) sequence.
#     If object is a scalar, a 0-dimensional array containing object is
#     returned.
# dtype : data-type, optional
#     The desired data-type for the array.  If not given, then the type will
#     be determined as the minimum type required to hold the objects in the
#     sequence.
# copy : bool, optional
#     If true (default), then the object is copied.  Otherwise, a copy will
#     only be made if __array__ returns a copy, if obj is a nested sequence,
#     or if a copy is needed to satisfy any of the other requirements
#     (`dtype`, `order`, etc.).
# order : {'K', 'A', 'C', 'F'}, optional
#     Specify the memory layout of the array. If object is not an array, the
#     newly created array will be in C order (row major) unless 'F' is
#     specified, in which case it will be in Fortran order (column major).
#     If object is an array the following holds.
#
#     ===== ========= ===================================================
#     order  no copy                     copy=True
#     ===== ========= ===================================================
#     'K'   unchanged F & C order preserved, otherwise most similar order
#     'A'   unchanged F order if input is F and not C, otherwise C order
#     'C'   C order   C order
#     'F'   F order   F order
#     ===== ========= ===================================================
#
#     When ``copy=False`` and a copy is made for other reasons, the result is
#     the same as if ``copy=True``, with some exceptions for 'A', see the
#     Notes section. The default order is 'K'.
# subok : bool, optional
#     If True, then sub-classes will be passed-through, otherwise
#     the returned array will be forced to be a base-class array (default).
# ndmin : int, optional
#     Specifies the minimum number of dimensions that the resulting
#     array should have.  Ones will be pre-pended to the shape as
#     needed to meet this requirement.
# like : array_like
#     Reference object to allow the creation of arrays which are not
#     NumPy arrays. If an array-like passed in as ``like`` supports
#     the ``__array_function__`` protocol, the result will be defined
#     by it. In this case, it ensures the creation of an array object
#     compatible with that passed in via this argument.
#
#     .. versionadded:: 1.20.0
#
# Returns
# -------
# out : ndarray
#     An array object satisfying the specified requirements.
#
# See Also
# --------
# empty_like : Return an empty array with shape and type of input.
# ones_like : Return an array of ones with shape and type of input.
# zeros_like : Return an array of zeros with shape and type of input.
# full_like : Return a new array with shape of input filled with value.
# empty : Return a new uninitialized array.
# ones : Return a new array setting values to one.
# zeros : Return a new array setting values to zero.
# full : Return a new array of given shape filled with value.
#
#
# Notes
# -----
# When order is 'A' and `object` is an array in neither 'C' nor 'F' order,
# and a copy is forced by a change in dtype, then the order of the result is
# not necessarily 'C' as expected. This is likely a bug.
#
# Examples
# --------
# >>> np.array([1, 2, 3])
# array([1, 2, 3])
#
# Upcasting:
#
# >>> np.array([1, 2, 3.0])
# array([ 1.,  2.,  3.])
#
# More than one dimension:
#
# >>> np.array([[1, 2], [3, 4]])
# array([[1, 2],
#        [3, 4]])
#
# Minimum dimensions 2:
#
# >>> np.array([1, 2, 3], ndmin=2)
# array([[1, 2, 3]])
#
# Type provided:
#
# >>> np.array([1, 2, 3], dtype=complex)
# array([ 1.+0.j,  2.+0.j,  3.+0.j])
#
# Data-type consisting of more than one element:
#
# >>> x = np.array([(1,2),(3,4)],dtype=[('a','<i4'),('b','<i4')])
# >>> x['a']
# array([1, 3])
#
# Creating an array from sub-classes:
#
# >>> np.array(np.mat('1 2; 3 4'))
# array([[1, 2],
#        [3, 4]])
#
# >>> np.array(np.mat('1 2; 3 4'), subok=True)
# matrix([[1, 2],
#         [3, 4]])
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# <li>
# <details><summary><u>numpy.core._multiarray_umath.where</u></summary>
# <blockquote>
# <code>
# where(condition, [x, y], /)
#
# Return elements chosen from `x` or `y` depending on `condition`.
#
# .. note::
#     When only `condition` is provided, this function is a shorthand for
#     ``np.asarray(condition).nonzero()``. Using `nonzero` directly should be
#     preferred, as it behaves correctly for subclasses. The rest of this
#     documentation covers only the case where all three arguments are
#     provided.
#
# Parameters
# ----------
# condition : array_like, bool
#     Where True, yield `x`, otherwise yield `y`.
# x, y : array_like
#     Values from which to choose. `x`, `y` and `condition` need to be
#     broadcastable to some shape.
#
# Returns
# -------
# out : ndarray
#     An array with elements from `x` where `condition` is True, and elements
#     from `y` elsewhere.
#
# See Also
# --------
# choose
# nonzero : The function that is called when x and y are omitted
#
# Notes
# -----
# If all the arrays are 1-D, `where` is equivalent to::
#
#     [xv if c else yv
#      for c, xv, yv in zip(condition, x, y)]
#
# Examples
# --------
# >>> a = np.arange(10)
# >>> a
# array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
# >>> np.where(a < 5, a, 10*a)
# array([ 0,  1,  2,  3,  4, 50, 60, 70, 80, 90])
#
# This can be used on multidimensional arrays too:
#
# >>> np.where([[True, False], [True, True]],
# ...          [[1, 2], [3, 4]],
# ...          [[9, 8], [7, 6]])
# array([[1, 8],
#        [3, 4]])
#
# The shapes of x, y, and the condition are broadcast together:
#
# >>> x, y = np.ogrid[:3, :4]
# >>> np.where(x < y, x, 10 + y)  # both x and 10+y are broadcast
# array([[10,  0,  0,  0],
#        [10, 11,  1,  1],
#        [10, 11, 12,  2]])
#
# >>> a = np.array([[0, 1, 2],
# ...               [0, 2, 4],
# ...               [0, 3, 6]])
# >>> np.where(a < 4, a, -1)  # -1 is broadcast
# array([[ 0,  1,  2],
#        [ 0,  2, -1],
#        [ 0,  3, -1]])
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
# <li> <h2 class='hglib'>keras</h2>
# <ul>
# <li>
# <details><summary><u>keras.engine.training.Model.predict</u></summary>
# <blockquote>
# <code>
# Generates output predictions for the input samples.
#
# Computation is done in batches. This method is designed for batch processing
# of large numbers of inputs. It is not intended for use inside of loops
# that iterate over your data and process small numbers of inputs at a time.
#
# For small numbers of inputs that fit in one batch,
# directly use `__call__()` for faster execution, e.g.,
# `model(x)`, or `model(x, training=False)` if you have layers such as
# `tf.keras.layers.BatchNormalization` that behave differently during
# inference. You may pair the individual model call with a `tf.function`
# for additional performance inside your inner loop.
# If you need access to numpy array values instead of tensors after your
# model call, you can use `tensor.numpy()` to get the numpy array value of
# an eager tensor.
#
# Also, note the fact that test loss is not affected by
# regularization layers like noise and dropout.
#
# Note: See [this FAQ entry](
# https://keras.io/getting_started/faq/#whats-the-difference-between-model-methods-predict-and-call)
# for more details about the difference between `Model` methods `predict()`
# and `__call__()`.
#
# Args:
#     x: Input samples. It could be:
#       - A Numpy array (or array-like), or a list of arrays
#         (in case the model has multiple inputs).
#       - A TensorFlow tensor, or a list of tensors
#         (in case the model has multiple inputs).
#       - A `tf.data` dataset.
#       - A generator or `keras.utils.Sequence` instance.
#       A more detailed description of unpacking behavior for iterator types
#       (Dataset, generator, Sequence) is given in the `Unpacking behavior
#       for iterator-like inputs` section of `Model.fit`.
#     batch_size: Integer or `None`.
#         Number of samples per batch.
#         If unspecified, `batch_size` will default to 32.
#         Do not specify the `batch_size` if your data is in the
#         form of dataset, generators, or `keras.utils.Sequence` instances
#         (since they generate batches).
#     verbose: Verbosity mode, 0 or 1.
#     steps: Total number of steps (batches of samples)
#         before declaring the prediction round finished.
#         Ignored with the default value of `None`. If x is a `tf.data`
#         dataset and `steps` is None, `predict()` will
#         run until the input dataset is exhausted.
#     callbacks: List of `keras.callbacks.Callback` instances.
#         List of callbacks to apply during prediction.
#         See [callbacks](/api_docs/python/tf/keras/callbacks).
#     max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
#         input only. Maximum size for the generator queue.
#         If unspecified, `max_queue_size` will default to 10.
#     workers: Integer. Used for generator or `keras.utils.Sequence` input
#         only. Maximum number of processes to spin up when using
#         process-based threading. If unspecified, `workers` will default
#         to 1.
#     use_multiprocessing: Boolean. Used for generator or
#         `keras.utils.Sequence` input only. If `True`, use process-based
#         threading. If unspecified, `use_multiprocessing` will default to
#         `False`. Note that because this implementation relies on
#         multiprocessing, you should not pass non-picklable arguments to
#         the generator as they can't be passed easily to children processes.
#
# See the discussion of `Unpacking behavior for iterator-like inputs` for
# `Model.fit`. Note that Model.predict uses the same interpretation rules as
# `Model.fit` and `Model.evaluate`, so inputs must be unambiguous for all
# three methods.
#
# Returns:
#     Numpy array(s) of predictions.
#
# Raises:
#     RuntimeError: If `model.predict` is wrapped in a `tf.function`.
#     ValueError: In case of mismatch between the provided
#         input data and the model's expectations,
#         or in case a stateful model receives a number of samples
#         that is not a multiple of the batch size.
#
# </code>
# <a href='#4'>back to header</a>
# </blockquote>
# </details>
# </li>
# </ul>
# </li>
#
# </ul>
# </details>

# %%
test = pd.read_csv('/kaggle/input/titanic/test.csv')
test['Sec_Name'] = test['Name'].astype(str).str.split().str[1]
X_test = test[['Pclass', 'Sex', 'Age', 'Embarked', 'Sec_Name']]
X_test = X_test.replace('male', 0)
X_test = X_test.replace('female', 1)
X_test = X_test.replace(np.nan, X_test['Age'].mean())
X_test['Embarked'] = X_test['Embarked'].replace('S',1)
X_test['Embarked'] = X_test['Embarked'].replace('C',2)
X_test['Embarked'] = X_test['Embarked'].replace('Q',3)
X_test['Sec_Name'] = X_test['Sec_Name'].replace('Mr.',1)
X_test['Sec_Name'] = X_test['Sec_Name'].replace('Mrs.',2)
X_test['Sec_Name'] = X_test['Sec_Name'].replace('Miss.',3)
X_test['Sec_Name'] = X_test['Sec_Name'].replace('Master.',4)
X_test['Sec_Name'] = pd.to_numeric(X_test['Sec_Name'], errors = 'coerce')
X_test['Sec_Name'] = X_test['Sec_Name'].replace(np.nan,0)
X_test = np.array(X_test)
p = model.predict(X_test)
p = np.where(p >= 0.5, 1, 0)
#model.evaluate(x_test, y_test)
#np.savetxt("test_ans4.csv", p, delimiter=",")
df_sub = pd.DataFrame()
df_sub['PassengerId'] = test['PassengerId']
df_sub['Survived'] = p.astype(np.int)

df_sub.to_csv('submission4.csv', index=False)

# %%
