Google Colab

วิเคราะห์สถิติอย่างง่าย

#Python สำหรับวิเคราะห์ผลวิจัยด้วยสถิติอย่างง่าย

ตัวอย่าง เป็นการวิเคราะห์ข้อมูลนักเรียน 30 คน

ค่าที่ต้องการวิเคราะห์ คือ ค่าเฉลี่ยของ Pre-test และ Post-test และ t-test ที่ระดับความเชื่อมั่น .05 เพื่อตรวจสอบสมมติฐานว่า Post-test แตกต่างจาก Pre-test หรือไม่

🔮 #วิธีการนำไปใช้

1. คัดลอก Code ด้านล่าง Post นี้

2. ไปที่ https://colab.research.google.com/

3. วาง Code และกด Run เพื่อเริ่มทำงาน

***เทียบผลการใช้ Code กับ Jamovi.org ให้ดูในอีกภาพด้วยค่ะ

🔮 แบบที่ 1 แบบแก้จำนวนผู้เรียนและคะแนนที่ได้ใน Code โดยตรง

คัดลอก Code ที่ด้านล่างนี้

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from scipy import stats

# ---------------------------------------------------------

# ส่วนที่ 1: เตรียมข้อมูล (ปรับปรุงเป็นข้อมูล 30 คน ตามที่ระบุ)

# ---------------------------------------------------------

data = {

'Student_ID': [f'S{i:02d}' for i in range(1, 31)], # สร้าง S01 - S30 อัตโนมัติ

'Pre_test': [

8, 10, 7, 12, 9, 6, 11, 13, 8, 7,

9, 10, 8, 5, 11, 12, 9, 10, 6, 8,

14, 7, 9, 10, 11, 8, 6, 12, 9, 7

'Post_test': [

15, 17, 14, 18, 16, 12, 18, 19, 15, 13,

16, 17, 15, 11, 18, 19, 16, 17, 12, 15,

19, 14, 16, 18, 18, 15, 13, 19, 16, 14

]

}

# แปลงข้อมูลเป็นตาราง DataFrame

df = pd.DataFrame(data)

# แสดงตารางคะแนน (แสดง 5 แถวแรก และ 5 แถวสุดท้าย เพื่อไม่ให้ยาวเกินไป)

print("--- ตัวอย่างตารางคะแนนของผู้เรียน (30 คน) ---")

print(df)

print("-" * 30)

# ---------------------------------------------------------

# ส่วนที่ 2: คำนวณค่าสถิติ (Mean & S.D.)

# ---------------------------------------------------------

# คำนวณก่อนเรียน

pre_mean = df['Pre_test'].mean()

pre_sd = df['Pre_test'].std()

# คำนวณหลังเรียน

post_mean = df['Post_test'].mean()

post_sd = df['Post_test'].std()

# คำนวณค่าพัฒนาการ (Difference)

df['Diff'] = df['Post_test'] - df['Pre_test']

# ---------------------------------------------------------

# ส่วนที่ 3: (Optional) ทดสอบ t-test แบบ Paired Samples

# ---------------------------------------------------------

t_statistic, p_value = stats.ttest_rel(df['Post_test'], df['Pre_test'])

# ---------------------------------------------------------

# ส่วนที่ 4: แสดงผลลัพธ์

# ---------------------------------------------------------

print(f"\nสรุปผลการวิเคราะห์ข้อมูล (N = {len(df)})")

print(f"1. คะแนนก่อนเรียน (Pre-test):")

print(f" - ค่าเฉลี่ย (Mean) = {pre_mean:.2f}")

print(f" - ส่วนเบี่ยงเบนมาตรฐาน (S.D.) = {pre_sd:.2f}")

print(f"\n2. คะแนนหลังเรียน (Post-test):")

print(f" - ค่าเฉลี่ย (Mean) = {post_mean:.2f}")

print(f" - ส่วนเบี่ยงเบนมาตรฐาน (S.D.) = {post_sd:.2f}")

print(f"\n3. ผลการเปรียบเทียบ (Paired t-test):")

print(f" - ค่า t-statistic = {t_statistic:.4f}")

print(f" - ค่า p-value = {p_value:.4f}") # แสดงทศนิยม 4 ตำแหน่ง

if p_value < 0.05:

print(" ** สรุป: คะแนนหลังเรียนสูงกว่าก่อนเรียน อย่างมีนัยสำคัญทางสถิติที่ระดับ .05")

else:

print(" ** สรุป: คะแนนหลังเรียนไม่แตกต่างจากก่อนเรียนอย่างมีนัยสำคัญ")

# ---------------------------------------------------------

# ส่วนที่ 5: สร้างกราฟเปรียบเทียบ

# ---------------------------------------------------------

labels = ['Pre-test', 'Post-test']

means = [pre_mean, post_mean]

errors = [pre_sd, post_sd]

fig, ax = plt.subplots(figsize=(6, 5))

# ปรับสีเล็กน้อยเพื่อให้ดูง่ายขึ้น

bars = ax.bar(labels, means, yerr=errors, align='center', alpha=0.8, ecolor='black', capsize=10, color=['#ff9999','#66b3ff'])

ax.set_ylabel('Scores (Full Score = 20)')

ax.set_title('Comparison of Pre-test and Post-test Scores (N=30)')

ax.yaxis.grid(True, linestyle='--', alpha=0.7)

# ใส่ตัวเลขบนกราฟแท่ง

for bar in bars:

height = bar.get_height()

ax.annotate(f'{height:.2f}',

xy=(bar.get_x() + bar.get_width() / 2, height),

xytext=(0, 3),

textcoords="offset points",

ha='center', va='bottom', fontweight='bold')

plt.tight_layout()

plt.show()

🔮 แบบที่ 2 Code สำหรับอัปโหลดคะแนนนักเรียนแบบข้อมูล csv

📋 ตัวอย่างไฟล์คะแนนสำหรับดาวน์โหลด csv ไปทดลอง https://docs.google.com/spreadsheets/d/1YurDZXzldtotluglu8C44pRcBjT9TxwO-Iip48xsz78/edit?usp=sharing

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from scipy import stats

import io

# ==========================================

# ส่วนที่ 1: นำเข้าไฟล์ (File Import)

# ==========================================

try:

from google.colab import files

print("Step 1: กรุณาเลือกไฟล์ CSV ที่ต้องการวิเคราะห์ (Upload):")

uploaded = files.upload()

if not uploaded:

print("!! คุณไม่ได้เลือกไฟล์ กรุณารันโปรแกรมใหม่อีกครั้ง")

exit()

else:

filename = next(iter(uploaded))

df = pd.read_csv(io.BytesIO(uploaded[filename]))

print(f"\n--> อ่านไฟล์ '{filename}' เรียบร้อยแล้ว")

except ImportError:

print("Step 1: ระบบตรวจพบว่าไม่ได้รันบน Google Colab")

file_path = input("กรุณาระบุตำแหน่งไฟล์ CSV (เช่น student_scores.csv): ")

try:

df = pd.read_csv(file_path)

except FileNotFoundError:

print(f"!! ไม่พบไฟล์: {file_path}")

exit()

# ==========================================

# ส่วนที่ 2: เลือกดูข้อมูล (View Data)

# ==========================================

print("\n" + "="*50)

print("Step 2: เลือกรูปแบบการแสดงผลตารางข้อมูล")

print(" กด 1 -> ดูตัวอย่างแค่ 5 แถวแรก (Default)")

print(" กด 2 -> ดูคะแนนของนักเรียนทุกคน (ทั้งหมด)")

view_option = input("เลือกหมายเลข (1 หรือ 2): ")

print(f"\n--- ตารางคะแนน (จำนวนนักเรียน N = {len(df)}) ---")

if view_option.strip() == '2':

print(df)

else:

print(df.head())

print("="*50)

# ==========================================

# ส่วนที่ 3: คำนวณและทดสอบสถิติ (Statistics)

# ==========================================

required_cols = ['Pre_test', 'Post_test']

if not all(col in df.columns for col in required_cols):

print(f"ERROR: ไฟล์ CSV ต้องมีคอลัมน์ชื่อ {required_cols}")

else:

# 1. คำนวณค่าเฉลี่ยและส่วนเบี่ยงเบนมาตรฐาน

pre_mean = df['Pre_test'].mean()

pre_sd = df['Pre_test'].std()

post_mean = df['Post_test'].mean()

post_sd = df['Post_test'].std()

# 2. ทดสอบ Paired t-test

t_statistic, p_value = stats.ttest_rel(df['Post_test'], df['Pre_test'])

# 3. แสดงผลลัพธ์เป็นข้อความ

print(f"\nStep 3: สรุปผลการวิเคราะห์สถิติ")

print(f"1. คะแนนก่อนเรียน (Pre-test): Mean = {pre_mean:.2f}, S.D. = {pre_sd:.2f}")

print(f"2. คะแนนหลังเรียน (Post-test): Mean = {post_mean:.2f}, S.D. = {post_sd:.2f}")

print(f"\n--- ผลการเปรียบเทียบ (Paired t-test) ---")

print(f" t-statistic = {t_statistic:.4f}")

print(f" p-value = {p_value:.4f}")

if p_value < 0.05:

print(" ** ผลสรุป: คะแนนหลังเรียน 'สูงกว่า' ก่อนเรียน อย่างมีนัยสำคัญทางสถิติ (p < .05)")

else:

print(" ** ผลสรุป: คะแนนหลังเรียน 'ไม่แตกต่าง' จากก่อนเรียน")

# ==========================================

# ส่วนที่ 4: แสดงผลกราฟแท่ง (Bar Chart)

# ==========================================

print("\nStep 4: กำลังสร้างกราฟเปรียบเทียบ...")

# เตรียมข้อมูลสำหรับกราฟ

labels = ['Pre-test', 'Post-test']

means = [pre_mean, post_mean]

errors = [pre_sd, post_sd] # ใช้ค่า S.D. เป็น Error bar

# ตั้งค่ากราฟ

fig, ax = plt.subplots(figsize=(8, 6))

# วาดกราฟแท่ง

bars = ax.bar(labels, means, yerr=errors, align='center', alpha=0.9,

ecolor='black', capsize=10, width=0.6,

color=['#ff9999', '#66b3ff']) # สีแดงอ่อน และ สีฟ้า

# ตกแต่งกราฟ

ax.set_ylabel('Scores')

ax.set_title(f'Comparison of Pre-test and Post-test Scores (N={len(df)})')

ax.yaxis.grid(True, linestyle='--', alpha=0.5)

# ใส่ตัวเลขค่าเฉลี่ยบนหัวกราฟแท่ง

for bar in bars:

height = bar.get_height()

ax.annotate(f'{height:.2f}',

xy=(bar.get_x() + bar.get_width() / 2, height),

xytext=(0, 5), # ขยับตัวเลขขึ้นไป 5 points

textcoords="offset points",

ha='center', va='bottom', fontsize=12, fontweight='bold')

plt.tight_layout()

plt.show() # แสดงกราฟ

ตัวอย่างคะแนนสำหรับหาค่าเฉลี่ย SD สำหรับกลุ่มเป้าหมาย 1 ห้องวิจัยในชั้นเรียน

Google Sites

Report abuse