from data_load import*
from features import*
from classifier import*
from random_guesser import*
import os
import pandas as pd
import re
import random
word_list = ['let', 'end', 'defn', 'function', 'fun', 'return', 'def', 'return', 'check', 'make', '->', '.format',
'define', '::', 'done', 'type', 'rescue', 'print', 'elif', 'clone', 'display', '$format', 'echo', 'str',
'join', '&&', 'val', 'Nil', 'object', '<-', '--', 'lambda', 'var', '//', 'tmpl', 'public function',
'stdlib', '=>', 'final', 'case', 'impl']
symbol_list = ['$', '^', ',', ';', '&', '|', '!', '*', '@', '#', '(', '{', ' ']
endings = ['end', ')', '}']
def data_frame_generator():
codelist = code_sucker()
typelist = type_getter()
df = pd.DataFrame(typelist, index=range(386))
df.columns = ["Language"]
df["Code"] = codelist
df['Language'] = df.Language.apply(lambda x:x.lower())
for string in word_list:
def sub_function(code):
x = string_ratio(string, code)
return x
df[string] = df.Code.apply(sub_function)
for char in symbol_list:
def sub_function2(code):
y = character_ratio(code, char)
return y
df[char] = df.Code.apply(sub_function2)
for ending in endings:
def sub_function3(code):
z = string_end(ending, code)
return z
df['_' + ending] = df.Code.apply(sub_function3)
return df
df = data_frame_generator()
#df.head(2)
def tdata_frame_generator():
test_codelist = tcode_sucker()
df = pd.read_csv("test.csv")
df["Code"] = test_codelist
for string in word_list:
def sub_function(code):
x = string_ratio(string, code)
return x
df[string] = df.Code.apply(sub_function)
for char in symbol_list:
def sub_function2(code):
y = character_ratio(code, char)
return y
df[char] = df.Code.apply(sub_function2)
for ending in endings:
def sub_function3(code):
z = string_end(ending, code)
return z
df['_' + ending] = df.Code.apply(sub_function3)
return df
test_df = tdata_frame_generator()
x_train, x_test, y_train, y_test = create_xy(df, test_df, word_list[0], 'Language')
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
tree = DecisionTreeClassifier()
run_classifier(tree, x_train, x_test, y_train, y_test)
gauss = GaussianNB()
run_classifier(gauss, x_train, x_test, y_train, y_test)
random_guesser(y_test)
import pickle
pickleclf = GaussianNB()
pickleclf.fit(x_train, y_train)
file_Name = "picklefile"
fileObject = open(file_Name,'wb')
pickle.dump(pickleclf, fileObject)
fileObject.close()
fileObject = open('picklefile','rb')
clf = pickle.load(fileObject)