import pandas as pd
import numpy as np
%matplotlib inline 
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (19,15)


df=pd.read_csv('/content/drive/MyDrive/Tutorial/Tutorial.csv')


df.head()


df.shape

(150699, 41)


df.plot()

<matplotlib.axes._subplots.AxesSubplot at 0x7fea62f70c50>


df['activity'].value_counts().plot.bar()

<matplotlib.axes._subplots.AxesSubplot at 0x7fea62e24f50>


df.isnull().sum().sum()

843


print(df.isnull().sum())

X1              0
Y1              0
Z1              0
X2              0
Y2              0
Z2              0
X3              0
Y3              0
Z3              0
X4            156
Y4            156
Z4            156
X5              0
Y5              0
Z5              0
X6              0
Y6              0
Z6              0
X7             17
Y7             17
Z7             17
X8              5
Y8              5
Z8              5
X9              0
Y9              0
Z9              0
X10             0
Y10             0
Z10             0
X11            21
Y11            21
Z11            21
X12            60
Y12            60
Z12            60
X13            22
Y13            22
Z13            22
subject_id      0
activity        0
dtype: int64


def segmentation(x_data,overlap_rate,time_window):
    
    # make a list for segment window and its label
    seg_data = []
    y_segmented_list = []

    #convert overlap rate to step for sliding window
    overlap = int((1 - overlap_rate)*time_window)
    
    #segment and keep the labels
    for i in range(0,x_data.shape[0],overlap):
        seg_data.append(x_data[i:i+time_window])
        y_segmented_list.append(x_data['activity'][i])
        
    return seg_data,y_segmented_list


#Segmentation with overlaprate=0 & window=100
df1_itpl=df.interpolate()
#replace missing values with 0
df1_itpl=df1_itpl.fillna(0) 
[seg, seg_label]=segmentation(df1_itpl,0.5,350)


def get_features(x_data):
    #Set features list
    features = []
    #Set columns name list
    DFclist=list(x_data.columns)

    #Calculate features (STD, Average, Max, Min) for each data columns X Y Z 
    for k in DFclist:
        # std
        features.append(x_data[k].std(ddof=0))
        # avg
        features.append(np.average(x_data[k]))
        # max
        features.append(np.max(x_data[k]))
        # min
        features.append(np.min(x_data[k]))
    return features


#set list
features_list=[]
label_list=[]
for j in range(0,len(seg)):
    #extract only xyz columns
            frame1=seg[j].drop(columns=['subject_id','activity'])
            

            #Get features and label for each elements
            features_list.append(get_features(frame1))
            label_list.append(seg_label[j])


from sklearn.ensemble import RandomForestClassifier         
model_ml = RandomForestClassifier(n_estimators=500,n_jobs=-1)


from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(features_list, label_list, test_size=0.3, random_state=42)


model_ml.fit(X_train, y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=500,
                       n_jobs=-1, oob_score=False, random_state=None, verbose=0,
                       warm_start=False)


from sklearn.metrics import classification_report
from sklearn.metrics import plot_confusion_matrix
from sklearn.metrics import confusion_matrix

y_predict = model_ml.predict(X_test)
print(classification_report(y_test,y_predict))
#confusion_matrix(y_test, y_predict)
plot_confusion_matrix(model_ml, X_test, y_test)
plt.show()

              precision    recall  f1-score   support

           1       0.88      0.79      0.83        47
           2       0.87      0.84      0.85        56
           3       0.96      0.90      0.93        52
           4       0.87      0.85      0.86        48
           5       0.78      0.93      0.85        56

    accuracy                           0.86       259
   macro avg       0.87      0.86      0.87       259
weighted avg       0.87      0.86      0.87       259

	X1	Y1	Z1	X2	Y2	Z2	X3	Y3	Z3	X4	Y4	Z4	X5	Y5	Z5	X6	Y6	Z6	X7	Y7	Z7	X8	Y8	Z8	X9	Y9	Z9	X10	Y10	Z10	X11	Y11	Z11	X12	Y12	Z12	X13	Y13	Z13	subject_id	activity
0	50.21639	-107.67267	1767.61377	63.20651	29.68195	1758.38440	51.69193	-133.12411	1621.81348	229.51003	-47.71713	1510.75000	139.99603	-172.22476	1370.11914	503.12790	-93.84714	1388.61548	731.14392	-34.92507	1355.24622	-127.75881	-38.28164	1509.38342	-403.79730	-40.54583	1415.14868	-616.61920	36.45244	1409.43909	206.18022	20.71629	1045.67297	-83.08297	37.08975	1040.91321	52.28787	-148.74643	1084.47241	1	1
1	50.21885	-107.45515	1767.62500	63.35519	29.99363	1758.43018	51.70267	-132.95493	1621.84692	229.57869	-47.61509	1510.74194	140.00488	-172.15320	1370.13770	503.11673	-93.58308	1388.44592	731.07935	-34.53156	1354.95374	-127.74041	-38.12450	1509.35669	-403.76154	-40.29504	1415.08594	-616.48090	36.95655	1409.20911	206.21725	20.64190	1045.71252	-83.05798	37.10030	1040.91003	52.33491	-148.76173	1084.48254	1	1
2	50.21194	-107.24545	1767.64001	63.39342	30.19756	1758.41455	51.70736	-132.75502	1621.83386	229.61604	-47.48216	1510.72375	140.01978	-172.09746	1370.17761	503.02640	-92.96793	1388.13721	731.00787	-34.12088	1354.63635	-127.71638	-37.94924	1509.33899	-403.74478	-39.96909	1415.00708	-616.39703	37.45546	1409.02454	206.25093	20.68835	1045.64026	-82.99553	37.10056	1040.90540	52.35517	-148.73924	1084.46460	1	1
3	50.18755	-107.05878	1767.63586	63.47015	30.40905	1758.38867	51.72849	-132.58279	1621.84436	229.66670	-47.39188	1510.69043	140.06505	-172.05089	1370.21545	503.09833	-93.07207	1388.09070	730.94763	-33.69686	1354.32178	-127.68904	-37.76253	1509.31946	-403.71774	-39.68367	1414.89624	-616.23877	38.08528	1408.53162	206.32034	20.69071	1045.63623	-82.94318	37.10549	1040.89172	52.40389	-148.75574	1084.43823	1	1
4	50.19378	-106.84532	1767.63867	63.49923	30.60268	1758.34790	51.70081	-132.40088	1621.84888	229.70909	-47.25908	1510.63879	140.08215	-171.98302	1370.24597	503.12549	-92.82301	1387.94287	730.91058	-33.25237	1354.03369	-127.64147	-37.60607	1509.27502	-403.65240	-39.37738	1414.77283	-616.13306	38.53448	1408.31934	206.36885	20.66676	1045.62207	-82.89524	37.13080	1040.87476	52.42453	-148.74034	1084.42480	1	1

Download Ipynb

Bento Activity Recognition Tutorial:¶

Library import:¶

Read Data:¶

Data Visualization:¶

Pre-processing:¶

Feature Extarction:¶

Training:¶