import pandas as pd from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.metrics import accuracy_score
# Load the dataset data = pd.read_csv('data.csv')
# Split the dataset into features and labels X = data.drop('label', axis=1) y = data['label']
# Split the data into training and testing sets X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 )
# Train a logistic regression model model = LogisticRegression() model.fit(X_train, y_train)
# Evaluate the model y_pred = model.predict(X_test) accuracy = accuracy_score(y_test, y_pred) print('Accuracy:', accuracy)
4. 文件读写
做 ML 时经常要读数据集、保存处理结果、写日志或者保存配置。最基本的是理解 with open(...) 的写法,它能保证文件用完后正确关闭。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
defwrite_to_file(filename, content): withopen(filename, 'w') as file: file.write(content)
defread_from_file(filename): withopen(filename, 'r') as file: return file.read()
defwrite_to_file(filename, content): if os.path.isfile(filename): print(f"File '{filename}' already exists.") else: withopen(filename, 'w') as file: file.write(content) print(f"Content '{content}' written to '{filename}'.")
defwrite_to_file(filename, content): try: withopen(filename, 'w') as file: file.write(content) print(f"Content written to '{filename}' successfully.") except IOError as e: print(f"Error writing to '{filename}': {e}")
defread_from_file(filename): try: withopen(filename, 'r') as file: return file.read() except FileNotFoundError: print(f"File '{filename}' not found.") except IOError as e: print(f"Error reading from '{filename}': {e}")
defappend_to_file(filename, content): try: withopen(filename, 'a') as file: file.write(content) print(f"Content appended to '{filename}' successfully.") except IOError as e: print(f"Error appending to '{filename}': {e}")
filename = "example.txt" write_to_file(filename, "Hello, World!") append_to_file(filename, "\nAppending some more content!") print(read_from_file(filename))
# Plot the data and regression line plt.scatter(X, y, color='blue', label='Data') plt.plot(X_new, y_pred, color='red', linewidth=2, label='Regression Line') plt.xlabel('X') plt.ylabel('y') plt.legend() plt.show()
写 ML 代码时,错误不一定来自算法,也可能来自数据维度、文件路径、包版本、数据类型。先掌握基础调试方法很重要。
常见错误大致可以分为:
语法错误:代码本身不符合 Python 语法。
运行时错误:比如文件不存在、数组越界、类型不匹配。
逻辑错误:代码能跑,但结果不对,这类最难查。
异常处理的基本写法如下:
1 2 3 4 5 6 7
try: # Code that might raise an exception result = 10 / 0 except ZeroDivisionError: print("Cannot divide by zero") except Exception as e: print("Unexpected error:", e)
也可以用 logging 代替大量 print,尤其是训练过程比较长的时候:
1 2 3 4 5 6 7 8
import logging
logging.basicConfig(level=logging.DEBUG)
logging.debug('This is a debug message') logging.info('This is an info message') logging.warning('This is a warning message') logging.error('This is an error message')