"""
Created on Mon Sep 23 23:02:38 2019
@author: Sowmya
Modified by Dijiang Huang 4/19/2020
Modified by Mark Khusid
December 6, 2024
"""'\nCreated on Mon Sep 23 23:02:38 2019\n\n@author: Sowmya\nModified by Dijiang Huang 4/19/2020\n'Import Libraries¶
import numpy as np
import pandas as pdDefine Variables¶
Data File Path¶
# Data file Path
DatasetPath='NSL-KDD/'Data File Name¶
input_train = "KDDTrain+.txt"
input_test = "KDDTest+.txt"
file_extension = '.csv' # .csv or .txtDefine Number of Attack Classes¶
num_attack_class = 4 # total number of attack classesDefine Attack Subclasses¶
#All attacks in NSL-KDD classed based on their attack classes: DoS, Prob, U2R, and R2L
attacks_subClass = \
[
[
'apache2',
'back',
'land',
'neptune',
'mailbomb',
'pod',
'processtable',
'smurf',
'teardrop',
'udpstorm',
'worm'
],
[
'ipsweep',
'mscan',
'portsweep',
'saint',
'satan',
'nmap'
],
[
'buffer_overflow',
'loadmodule',
'perl',
'ps',
'rootkit',
'sqlattack','xterm'],
[
'ftp_write',
'guess_passwd',
'httptunnel',
'imap',
'multihop',
'named',
'phf',
'sendmail',
'snmpgetattack',
'spy',
'snmpguess',
'warezclient',
'warezmaster',
'xlock',
'xsnoop'
]
]Load the Datasets¶
print("Loading", input_train, "and", input_test, "files from the current folder where this script resides.....\n")
dataset_train = pd.read_csv(DatasetPath + input_train, header=None, encoding="ISO-8859-1")
dataset_test = pd.read_csv(DatasetPath + input_test, header=None, encoding="ISO-8859-1")Loading KDDTrain+.txt and KDDTest+.txt files from the current folder where this script resides.....
dataset_test.describe()Loading...
dataset_test.head()Loading...
dataset_train.describe()Loading...
dataset_train.head()Loading...
Create Train and Test Datasets¶
X_train = dataset_train.iloc[:, :].values
X_test = dataset_test.iloc[:, :].valuesX_train.shape(125973, 43)X_test.shape(22544, 43)Define Selected Attack Classes List¶
# Set1 is selected attack classes
training_attack_class_list = []
training_attack_class_list[]# Set2 is removed attack classes
testing_attack_class_list = []
testing_attack_class_list[]Scenario A (SA)¶
Enter the Desired Attack Classes¶
attack_class_1 = [1, 3]
attack_class_1[1, 3]attack_class_2 = [2, 4]
attack_class_2[2, 4]Append Desired Attack Classes to Training and Testing Attack Class Lists¶
training_attack_class_list = []
training_attack_class_list.append(attack_class_1)
training_attack_class_list[[1, 3]]testing_attack_class_list = []
testing_attack_class_list.append(attack_class_2)
testing_attack_class_list[[2, 4]]Select Subclasses and Save to File¶
Create Training Data Set¶
print("Creating training set.....\n")
setA_train = []
if (training_attack_class_list[0][0] != 0) and \
(len(training_attack_class_list[0]) != num_attack_class):
for i in range(len(X_train)):
# exp., X_train[i, -2] is the label of attack subclass, and attacks_subClass[training_attack_class_list[0][j]-1] identify the selected attack class
if str.lower(str(X_train[i,-2])) == 'normal':
setA_train.append(X_train[i])
for j in range(len(training_attack_class_list[0])):
if str.lower(str(X_train[i, -2])) in attacks_subClass[training_attack_class_list[0][j]-1]:
setA_train.append(X_train[i])
trainingFileName="Training"
for i in range(len(training_attack_class_list[0])):
trainingFileName = trainingFileName + "-a" + str(training_attack_class_list[0][i])
trainingFileName = trainingFileName + file_extension
np.savetxt(trainingFileName, setA_train, delimiter=',', fmt="%s" )
print("Files " + trainingFileName + " have been created in the same folder this script resides\n")
elif (len(training_attack_class_list[0]) == num_attack_class):
print("No changes is needed for training dataset!\n")
else:
print("No attack classes are chosen, thus no new training file is created!\n")Creating training set.....
Files Training-a1-a3.csv have been created in the same folder this script resides
Create Testing Data Set¶
print("Creating testing set.....\n")
setA_test = []
# the following for loop choose selected attack classes and normal labeled data and put them into the setA_train.
if (testing_attack_class_list[0][0] != 0) and \
(len(testing_attack_class_list[0]) != num_attack_class):
for i in range(len(X_test)):
# exp., X_train[i, -2] is the label of attack subclass, and attacks_subClass[training_attack_class_list[0][j]-1] identify the selected attack class
if str.lower(str(X_test[i,-2])) == 'normal':
setA_test.append(X_test[i])
for j in range(len(testing_attack_class_list[0])):
if str.lower(str(X_test[i, -2])) in attacks_subClass[testing_attack_class_list[0][j]-1]:
setA_test.append(X_test[i])
testingFileName="Testing"
for i in range(len(testing_attack_class_list[0])):
testingFileName = testingFileName + "-a" + str(testing_attack_class_list[0][i])
testingFileName = testingFileName + file_extension
np.savetxt(testingFileName, setA_test, delimiter=',', fmt="%s" )
print("Files " + testingFileName + " have been created in the same folder this script resides\n")
elif len(testing_attack_class_list[0]) == num_attack_class:
print("No changes is needed for testing dataset!\n")
else:
print("No attack classes are chosen, thus no new training file is created!\n")Creating testing set.....
Files Testing-a2-a4.csv have been created in the same folder this script resides
Scenario B (SB)¶
Enter the Desired Attack Classes¶
attack_class_1 = [1, 2]
attack_class_1[1, 2]attack_class_2 = [1]
attack_class_2[1]Append Desired Attack Classes to Training and Testing Attack Class Lists¶
training_attack_class_list = []
training_attack_class_list.append(attack_class_1)
training_attack_class_list[[1, 2]]testing_attack_class_list = []
testing_attack_class_list.append(attack_class_2)
testing_attack_class_list[[1]]Select Subclasses and Save to File¶
Create Training Data Set¶
print("Creating training set.....\n")
setA_train = []
if (training_attack_class_list[0][0] != 0) and \
(len(training_attack_class_list[0]) != num_attack_class):
for i in range(len(X_train)):
# exp., X_train[i, -2] is the label of attack subclass, and attacks_subClass[training_attack_class_list[0][j]-1] identify the selected attack class
if str.lower(str(X_train[i,-2])) == 'normal':
setA_train.append(X_train[i])
for j in range(len(training_attack_class_list[0])):
if str.lower(str(X_train[i, -2])) in attacks_subClass[training_attack_class_list[0][j]-1]:
setA_train.append(X_train[i])
trainingFileName="Training"
for i in range(len(training_attack_class_list[0])):
trainingFileName = trainingFileName + "-a" + str(training_attack_class_list[0][i])
trainingFileName = trainingFileName + file_extension
np.savetxt(trainingFileName, setA_train, delimiter=',', fmt="%s" )
print("Files " + trainingFileName + " have been created in the same folder this script resides\n")
elif (len(training_attack_class_list[0]) == num_attack_class):
print("No changes is needed for training dataset!\n")
else:
print("No attack classes are chosen, thus no new training file is created!\n")Creating training set.....
Files Training-a1-a2.csv have been created in the same folder this script resides
Create Testing Data Set¶
print("Creating testing set.....\n")
setA_test = []
# the following for loop choose selected attack classes and normal labeled data and put them into the setA_train.
if (testing_attack_class_list[0][0] != 0) and \
(len(testing_attack_class_list[0]) != num_attack_class):
for i in range(len(X_test)):
# exp., X_train[i, -2] is the label of attack subclass, and attacks_subClass[training_attack_class_list[0][j]-1] identify the selected attack class
if str.lower(str(X_test[i,-2])) == 'normal':
setA_test.append(X_test[i])
for j in range(len(testing_attack_class_list[0])):
if str.lower(str(X_test[i, -2])) in attacks_subClass[testing_attack_class_list[0][j]-1]:
setA_test.append(X_test[i])
testingFileName="Testing"
for i in range(len(testing_attack_class_list[0])):
testingFileName = testingFileName + "-a" + str(testing_attack_class_list[0][i])
testingFileName = testingFileName + file_extension
np.savetxt(testingFileName, setA_test, delimiter=',', fmt="%s" )
print("Files " + testingFileName + " have been created in the same folder this script resides\n")
elif len(testing_attack_class_list[0]) == num_attack_class:
print("No changes is needed for testing dataset!\n")
else:
print("No attack classes are chosen, thus no new training file is created!\n")Creating testing set.....
Files Testing-a1.csv have been created in the same folder this script resides
Scenario C (SC)¶
Enter the Desired Attack Classes¶
attack_class_1 = [1, 2]
attack_class_1[1, 2]attack_class_2 = [1, 2, 3]
attack_class_2[1, 2, 3]Append Desired Attack Classes to Training and Testing Attack Class Lists¶
training_attack_class_list = []
training_attack_class_list.append(attack_class_1)
training_attack_class_list[[1, 2]]testing_attack_class_list = []
testing_attack_class_list.append(attack_class_2)
testing_attack_class_list[[1, 2, 3]]Select Subclasses and Save to File¶
Create Training Data Set¶
print("Creating training set.....\n")
setA_train = []
if (training_attack_class_list[0][0] != 0) and \
(len(training_attack_class_list[0]) != num_attack_class):
for i in range(len(X_train)):
# exp., X_train[i, -2] is the label of attack subclass, and attacks_subClass[training_attack_class_list[0][j]-1] identify the selected attack class
if str.lower(str(X_train[i,-2])) == 'normal':
setA_train.append(X_train[i])
for j in range(len(training_attack_class_list[0])):
if str.lower(str(X_train[i, -2])) in attacks_subClass[training_attack_class_list[0][j]-1]:
setA_train.append(X_train[i])
trainingFileName="Training"
for i in range(len(training_attack_class_list[0])):
trainingFileName = trainingFileName + "-a" + str(training_attack_class_list[0][i])
trainingFileName = trainingFileName + file_extension
np.savetxt(trainingFileName, setA_train, delimiter=',', fmt="%s" )
print("Files " + trainingFileName + " have been created in the same folder this script resides\n")
elif (len(training_attack_class_list[0]) == num_attack_class):
print("No changes is needed for training dataset!\n")
else:
print("No attack classes are chosen, thus no new training file is created!\n")Creating training set.....
Files Training-a1-a2.csv have been created in the same folder this script resides
Create Testing Data Set¶
print("Creating testing set.....\n")
setA_test = []
# the following for loop choose selected attack classes and normal labeled data and put them into the setA_train.
if (testing_attack_class_list[0][0] != 0) and \
(len(testing_attack_class_list[0]) != num_attack_class):
for i in range(len(X_test)):
# exp., X_train[i, -2] is the label of attack subclass, and attacks_subClass[training_attack_class_list[0][j]-1] identify the selected attack class
if str.lower(str(X_test[i,-2])) == 'normal':
setA_test.append(X_test[i])
for j in range(len(testing_attack_class_list[0])):
if str.lower(str(X_test[i, -2])) in attacks_subClass[testing_attack_class_list[0][j]-1]:
setA_test.append(X_test[i])
testingFileName="Testing"
for i in range(len(testing_attack_class_list[0])):
testingFileName = testingFileName + "-a" + str(testing_attack_class_list[0][i])
testingFileName = testingFileName + file_extension
np.savetxt(testingFileName, setA_test, delimiter=',', fmt="%s" )
print("Files " + testingFileName + " have been created in the same folder this script resides\n")
elif len(testing_attack_class_list[0]) == num_attack_class:
print("No changes is needed for testing dataset!\n")
else:
print("No attack classes are chosen, thus no new training file is created!\n")Creating testing set.....
Files Testing-a1-a2-a3.csv have been created in the same folder this script resides