# ---
# jupyter:
#   jupytext:
#     text_representation:
#       extension: .py
#       format_name: percent
#       format_version: '1.3'
#       jupytext_version: 1.14.1
#   kernelspec:
#     display_name: Python 3
#     language: python
#     name: python3
# ---

# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# # Index of ML Operations<a id='top_phases'></a>
# <ul>
# <ul><li><details><summary><h2>Imported Libraries</h2></summary>
# <ul>
#
# <li><b>numpy</b></li>
# <li><b>os</b></li>
# <li><b>pandas</b></li>
# <li><b>tensorflow</b></li>
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Visualization</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Data Preparation</span></h2></summary>
# <ul>
#
# None
#
# </ul>
# </details></li>
# <ul><li><details><summary><h4><s>Data Profiling and Exploratory Data Analysis</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Data Cleaning Filtering</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Data Sub-sampling and Train-test Splitting</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Feature Engineering</span></h2></summary>
# <ul>
#
# None
#
# </ul>
# </details></li>
# <ul><li><details><summary><h4><s>Feature Transformation</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Feature Selection</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <li><details><summary><h2><span style='color:#42a5f5'>Model Building and Training</span></h2></summary>
# <ul>
#
# None
#
# </ul>
# </details></li>
# <ul><li><details><summary><h4><s>Model Training</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Model Parameter Tuning</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# <ul><li><details><summary><h4><s>Model Validation and Assembling</s> (no calls found)</h4></summary>
# <ul>
#
# None
#
# </ul>
# </details></li></ul>
# </ul>
# <hr>


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>1. Library Loading</h1>  <a id='1'></a><small><a href='#top_phases'>back to top</a></small>

# %% _uuid="8f2839f25d086af736a60e9eeb907d3b93b6e0e5" _cell_guid="b1076dfc-b9ad-4769-8c92-a6c4dae69d19"
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>2. Data Preparation</h1>  <a id='2'></a><small><a href='#top_phases'>back to top</a></small>

# %%
df = pd.read_csv('../input/competitive-data-science-predict-future-sales/sales_train.csv')
sample = pd.read_csv('../input/competitive-data-science-predict-future-sales/sample_submission.csv')


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>3. Data Preparation</h1>  <a id='3'></a><small><a href='#top_phases'>back to top</a></small>

# %%
df.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>4. Data Preparation</h1>  <a id='4'></a><small><a href='#top_phases'>back to top</a></small>

# %%
sample.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>5. Data Preparation</h1>  <a id='5'></a><small><a href='#top_phases'>back to top</a></small>

# %%
items = pd.read_csv('../input/competitive-data-science-predict-future-sales/items.csv')
items_category = pd.read_csv('../input/competitive-data-science-predict-future-sales/item_categories.csv')
shops = pd.read_csv('../input/competitive-data-science-predict-future-sales/shops.csv')


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>6. Data Preparation</h1>  <a id='6'></a><small><a href='#top_phases'>back to top</a></small>

# %%
items.head()

# %%
items.shape

# %%
items_category.shape


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>9. Data Preparation</h1>  <a id='9'></a><small><a href='#top_phases'>back to top</a></small>

# %%
items.head(5)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>10. Data Preparation | Feature Engineering</h1>  <a id='10'></a><small><a href='#top_phases'>back to top</a></small>

# %%
items = items.drop(columns = ['item_name','item_name'])


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>11. Data Preparation</h1>  <a id='11'></a><small><a href='#top_phases'>back to top</a></small>

# %%
items_category.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>12. Data Preparation</h1>  <a id='12'></a><small><a href='#top_phases'>back to top</a></small>

# %%
shops.head()

# %%
shops.shape

# %%
category = []
for i in df['item_id']:
    category.append(items['item_category_id'][i])

# %%
print(category[0:20])

# %%
items.iloc[22154,:]

# %%
df['item_category_id'] = category


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>18. Data Preparation</h1>  <a id='18'></a><small><a href='#top_phases'>back to top</a></small>

# %%
df.head()

# %%
data = df[df['item_cnt_day']<=50]

# %%
data = data[data['item_cnt_day']>-3]

# %%
data.shape

# %%
df.shape

# %%
len(data['item_cnt_day'].unique())

# %%
data_train = data.drop(columns = ['item_cnt_day','item_id','date'])

# %%
target = data['item_cnt_day']

# %%
print(target.value_counts())

# %%
# temp = data.value_counts()
# import matplotlib.pyplot as plt
# plt.plot(temp[:])

# %%
data.shape

# %%
data.head()

# %%
data.drop(columns = ['date','item_id'],inplace = True)

# %%
date_block = []
for i in data['date_block_num']:
    date_block.append(i%12)


# %%
data['date_block_engineered'] = date_block

# %%
data['date_block_engineered'].unique()

# %%
data.drop(columns = ['date_block_num'],inplace = True)

# %%
data.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>36. Library Loading</h1>  <a id='36'></a><small><a href='#top_phases'>back to top</a></small>

# %%
import tensorflow.keras as keras

# %%
labels = data['item_cnt_day']

# %%
len(labels.unique())

# %%
# labels = pd.get_dummies(labels)

# %%
labels.head()

# %%
labels.shape

# %%
data.head()

# %%
data = data.drop(columns = ['item_cnt_day'])

# %%
data.shape

# %%
labels.shape

# %%
label = labels.transpose()

# %%
keras.backend.clear_session()
model = keras.models.Sequential([
    keras.layers.Dense(3,input_dim = 2,activation = 'relu'),
    keras.layers.Dense(1,activation = 'relu')
])
early = keras.callbacks.EarlyStopping(patience = 5)
model_check = keras.callbacks.ModelCheckpoint('model.h5',save_best_only = True)
model.compile(loss = 'mse',optimizer = 'adam',metrics = ['accuracy'])


# %%
data.head()

# %%
data_x = data.drop(columns = ['date_block_engineered','item_price'])

# %%
data_x.head()

# %%
model.fit(data_x,labels,epochs = 500,validation_split = 0.2,callbacks = [early,model_check],batch_size = 64)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>52. Data Preparation</h1>  <a id='52'></a><small><a href='#top_phases'>back to top</a></small>

# %%
test = pd.read_csv('../input/competitive-data-science-predict-future-sales/test.csv')

# %%
item_category = []
for i in test['item_id']:
    item_category.append(items['item_category_id'][i])

# %%
test['item_category_id'] = item_category


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>55. Data Preparation</h1>  <a id='55'></a><small><a href='#top_phases'>back to top</a></small>

# %%
test.head()

# %%
# df_sorted = df.sort_values(['item_id'])

# %%
# df_sorted.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>58. Data Preparation | Feature Engineering</h1>  <a id='58'></a><small><a href='#top_phases'>back to top</a></small>

# %%
test = test.drop(columns = ['ID','item_id'])

# %%
predictions = model.predict(test)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>60. Data Preparation</h1>  <a id='60'></a><small><a href='#top_phases'>back to top</a></small>

# %%
predictions = pd.DataFrame(predictions)


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>61. Data Preparation</h1>  <a id='61'></a><small><a href='#top_phases'>back to top</a></small>

# %%
predictions.head()

# %%
pred = []
for i in predictions[0]:
    pred.append(round(i))

# %%
predictions['item_cnt_day'] = pred


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>64. Data Preparation</h1>  <a id='64'></a><small><a href='#top_phases'>back to top</a></small>

# %%
predictions.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>65. Data Preparation</h1>  <a id='65'></a><small><a href='#top_phases'>back to top</a></small>

# %%
test = pd.read_csv('../input/competitive-data-science-predict-future-sales/test.csv')

# %%
predictions['ID'] = test['ID']


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>67. Data Preparation</h1>  <a id='67'></a><small><a href='#top_phases'>back to top</a></small>

# %%
predictions.head()


# %% [markdown] deletable=false editable=false run_control={"frozen": true}
# <h1 class='hg'>68. Data Preparation | Feature Engineering</h1>  <a id='68'></a><small><a href='#top_phases'>back to top</a></small>

# %%
predictions = predictions.drop(columns = [0])

# %%
predictions.head()

# %%
predictions.to_csv('Submit_1.csv',index = False)

# %%
predictions.columns = ['item_cnt_month','ID']

# %%
predictions.to_csv('Submit_2.csv',index = False)

# %%
