-
머신러닝 개발일지 001카테고리 없음 2022. 1. 20. 09:43import osos.environ['KAGGLE_USERNAME'] = 'whitesmithdp' # usernameos.environ['KAGGLE_KEY'] = '109b4f2a434174fa3c3accb734e78af4' # key일단 아이디와 키값!kaggle datasets download -d rsadiq/salary
데이터셋 다운
!unzip salary.zip다운받은 데이터셋 언팩from tensorflow.keras.models import Sequentialfrom tensorflow.keras.layers import Densefrom tensorflow.keras.optimizers import Adam, SGDimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltimport seaborn as snsfrom sklearn.model_selection import train_test_split
df = pd.read_csv('Salary.csv')
df.tail(5)패키지들x_data = np.array(df['YearsExperience'], dtype=np.float32)y_data = np.array(df['Salary'], dtype=np.float32)
x_data = x_data.reshape((-1, 1))y_data = y_data.reshape((-1, 1))
print(x_data.shape)print(y_data.shape)
x_train, x_val, y_train, y_val = train_test_split(x_data, y_data, test_size=0.2, random_state=2021)
print(x_train.shape, x_val.shape)print(y_train.shape, y_val.shape)
model = Sequential([Dense(1)])
model.compile(loss='mean_squared_error', optimizer=SGD(lr=0.01))
model.fit(x_train,y_train,validation_data=(x_val, y_val), # 검증 데이터를 넣어주면 한 epoch이 끝날때마다 자동으로 검증epochs=100 # epochs 복수형으로 쓰기!)데이터셋 활용
loss의
mean_squared_error를mean_absolute_error로변경lr (learning rate) 숫자 변경하여 적용y_pred = model.predict(x_val)
plt.scatter(x_val, y_val)plt.scatter(x_val, y_pred, color='r')plt.show()붉게 표시되는 것이 함수를 통해 예상하는 값