Statistics in SQL: Student’s t-test
DECLARE@tNUMERIC(18,2)–the value of t
DECLARE@dfINT–the degrees of freedom
DECLARE@ScoresTABLE(sampleint,variableint)–the test scores
INSERTINTO@scores (sample,variable)
VALUES
(1,27),(1,30),(1,15),(1,17),(1,21),(1,24),(1,26),(1,20),(1,21),(1,36),
(1,21),(1,31),(1,23),(1,32),(1,30),(1,20),(1,22),(1,32),(1,19),(1,25),
(2,2),(2,23),(2,32),(2,37),(2,14),(2,12),(2,15),(2,23),(2,31),(2,17),
(2,8),(2,13),(2,22),(2,14),(2,22),(2,12),(2,14),(2,22),(2,26),(2,16)
/*
We have two independent samples. sample 1 is the experimental 2 is the control.
They could be the moisture content of barley or the thickness of the shell
of shellfish. It might even be the diameter of tomatoes.
To make things clearer, we’ll put all the calculations into variables
Standard deviation Average number in sample*/
DECLARE@st1NUMERIC(18,2),@x1NUMERIC(18,2),@N1NUMERIC(18,2),
@st2NUMERIC(18,2),@x2NUMERIC(18,2),@N2NUMERIC(18,2);
/* so we get the standard deviation, avarage and count for each sample */
–the experimental
SELECT@st1=StDev(s.variable),@x1=Avg(Convert(NUMERIC(18,2),variable)),@N1=Count(*)
FROM@ScoresASs
WHEREs.sample=1;
–the control
SELECT@st2=StDev(s.variable),@x2=Avg(Convert(NUMERIC(18,2),variable)),@N2=Count(*)
FROM@ScoresASs
WHEREs.sample=2;
–and we do the calculation
SELECT@t=(@x1-@x2)/Sqrt((Square(@st1)/@N1)+(Square(@st2)/@N2)),
@df=@N1+@N2-2
SELECT‘The t value was ‘+Convert(VARCHAR(20),@t)+‘ with ‘+Convert(VARCHAR(20),@df)+‘ degrees of freedom’
/*
Now we just have to look up the critical T values for the degrees of freedom of the test.
*/
;WITHCriticalTValues (Df,p05,p025,p01,p005)
AS
(SELECTDf,p05,p025,p01,p005from(VALUES
(1,12.71,25.45,63.66,127.32),
(2,4.30,6.20,9.92,14.09),
(3,3.18,4.17,5.84,7.45),
(4,2.78,3.50,4.60,5.60),
(5,2.57,3.16,4.03,4.77),
(6,2.45,2.97,3.71,4.32),
(7,2.36,2.84,3.50,4.03),
(8,2.31,2.75,3.36,3.83),
(9,2.26,2.68,3.25,3.69),
(10,2.23,2.63,3.17,3.58),
(11,2.20,2.59,3.11,3.50),
(12,2.18,2.56,3.05,3.43),
(13,2.16,2.53,3.01,3.37),
(14,2.14,2.51,2.98,3.33),
(15,2.13,2.49,2.95,3.29),
(16,2.12,2.47,2.92,3.25),
(17,2.11,2.46,2.90,3.22),
(18,2.10,2.44,2.88,3.20),
(19,2.09,2.43,2.86,3.17),
(20,2.09,2.42,2.84,3.15),
(21,2.08,2.41,2.83,3.14),
(22,2.07,2.41,2.82,3.12),
(23,2.07,2.40,2.81,3.10),
(24,2.06,2.39,2.80,3.09),
(25,2.06,2.38,2.79,3.08),
(26,2.06,2.38,2.78,3.07),
(27,2.05,2.37,2.77,3.06),
(28,2.05,2.37,2.76,3.05),
(29,2.04,2.36,2.76,3.04),
(30,2.04,2.36,2.75,3.03),
(40,2.02,2.33,2.70,2.97),
(60,2.00,2.30,2.66,2.92),
(120,1.98,2.27,2.62,2.86),
(10000,1.96,2.24,2.58,2.81))CriticalValues(Df,p05,p025,p01,p005))
SELECT‘The t value was’+
CASEWHEN@t>CriticalTValues.p005THEN‘ highly significant at the 0.005 level’
WHEN@t>CriticalTValues.p01THEN‘ significant at the 0.01 level’
WHEN@t>CriticalTValues.p025THEN‘ significant at the 0.025 level’
WHEN@t>CriticalTValues.p05THEN‘ just significant at the 0.05 level’
ELSE ‘n’‘t significant’end
FROMCriticalTValues–and get the lowest version with sufficient degrees of freedom.
WHEREdf=(SELECTMin(df)FROM CriticalTValuesctetWHEREctet.df>=@df)