import sys reload(sys) sys.setdefaultencoding('utf-8') from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.linear_model.logistic import LogisticRegression x = [] x.append('fuck you') x.append('fuck you all') x.append('hello everyon') x.append('fuck him') x.append('hello body') y = [1, 1, 0] vectorizer = TfidfVectorizer() x_train = vectorizer.fit_transform(x[:-2]) x_test = vectorizer.transform(x[-2:]) classifier = LogisticRegression() classifier.fit(x_train, y) predictions = classifier.predict(x_test) print predictions