class: center, middle, inverse, title-slide # Cohen’s Kappa ## EDP 612 Week 8 ### Dr. Abhik Roy --- <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.6.0/jquery.min.js"></script> <script type="text/x-mathjax-config"> MathJax.Hub.Register.StartupHook("TeX Jax Ready",function () { MathJax.Hub.Insert(MathJax.InputJax.TeX.Definitions.macros,{ cancel: ["Extension","cancel"], bcancel: ["Extension","cancel"], xcancel: ["Extension","cancel"], cancelto: ["Extension","cancel"] }); }); </script> <style> section { display: flex; display: -webkit-flex; } section { height: 600px; width: 60%; margin: auto; border-radius: 21px; background-color: #212121; } section p { text-align: center; font-size: 30px; background-color: #212121; border-radius: 21px; font-family: Roboto Condensed; font-style: bold; padding: 12px; color: #bff4ee; margin: auto; } #center { text-align: center; } .center p { margin: 0; position: absolute; top: 50%; left: 50%; -ms-transform: translate(-50%, -50%); transform: translate(-50%, -50%); } .center2 { margin: 0; position: absolute; top: 50%; left: 50%; -ms-transform: translate(-50%, -50%); transform: translate(-50%, -50%); } .tab { display: inline-block; margin-left: 40px; } td, th, tr, table { border: 0 !important; border-spacing:0 !important; } </style> <style type="text/css"> .highlight-last-item > ul > li, .highlight-last-item > ol > li { opacity: 0.5; } .highlight-last-item > ul > li:last-of-type, .highlight-last-item > ol > li:last-of-type { opacity: 1; } </style>
--- class: highlight-last-item layout: true --- # <span style="color:#d9534f;font-weight:bold;">Reliability</span> and <span style="color:#428bca;font-weight:bold;">Validity</span> -- <br> <br> .pull-left[ <center> <span style="color:#d9534f;font-weight:bold;">Reliability</span><br><br><i>being consistent</i> </center> ] -- .pull-right[ <center> <span style="color:#428bca;font-weight:bold;">Validity</span><br><br><i>on target</i> </center> ] --- # Recall Scenarios .pull-left[ <center> <i>Not</i><span style="color:#d9534f;font-weight:bold;"> Reliable</span> and <i>not</i> <span style="color:#428bca;font-weight:bold;"> Valid</span> <br><br> <img src="img/image10.png" alt="norelnoval" style="width: 150px;"/> </center> ] -- .pull-right[ <center> <span style="color:#d9534f;font-weight:bold;">Reliable</span> but <i>not</i> <span style="color:#428bca;font-weight:bold;"> Valid</span> <br><br> <img src="img/image12.png" alt="relnoval" style="width: 150px;"/> </center> ] -- <br> <br> .pull-left[ <center> <i>Not</i><span style="color:#d9534f;font-weight:bold;"> Reliable</span> but <span style="color:#428bca;font-weight:bold;">Valid</span> <br><br> <img src="img/image13.png" alt="norelval" style="width: 150px;"/> </center> ] -- .pull-right[ <center> <span style="color:#d9534f;font-weight:bold;">Reliable</span> and <span style="color:#428bca;font-weight:bold;">Valid</span> <br><br> <img src="img/image14.png" alt="relval" style="width: 150px;"/> </center> ] --- # Basic Tenant <br> <br> .pull-left[ A test can be <span style="color:#d9534f;font-weight:bold;">Reliable</span> without being <span style="color:#428bca;font-weight:bold;">Valid</span> ] -- .pull-right[ A test cannot be <span style="color:#428bca;font-weight:bold;">Valid</span> unless it is <span style="color:#d9534f;font-weight:bold;">Reliable</span> ] --- # <span style="color:#d9534f;font-weight:bold;">Reliability</span> -- <br> <br> .pull-left[ <center> <span style="color: #ffd5b7;font-weight:bold;">Intra-rater</span> <span style="color:#d9534f;font-weight:bold;">Reliability</span><br><br><i>the degree of agreement between different measurements done by the <span style="color: #ffd5b7;">same person</span></i> </center> ] -- .pull-right[ <center> <span style="color: #b7e1ff;font-weight:bold;">Inter-rater</span> <span style="color:#d9534f;font-weight:bold;">Reliability</span><br><br><i>degree of agreement between between different measurements done by <span style="color: #b7e1ff;">multiple people</span></i> </center> ] --- # Cohen's Kappa `\(\kappa\)` + *Officially*. Measure of the agreement between two raters who each classify `\(N\)` items into `\(C\)` mutually exclusive categories -- + *Basic idea*. -- + Quantitative measure of reliability for two raters that are rating the same thing -- + With a correction for how often that the raters may agree by chance -- + *Lay terms*. Measure of how well do different people agree --- ## Evaluating -- <br> <br> .pull-left[ <center> `\kappa < 0` <br><br> random agreement among raters </center> ] -- .pull-right[ <center> `\kappa = 1` <br><br> complete agreement among raters </center> ] --- ## Decision Matrix -- <center> <img src="img/irr.png" alt="irr table" style="width: 400px;"/> </center> -- .pull-left[ <center> `A` <br><br> <font size="3"><i>Agreement</i></font><br><br> <font size="3">The total number of instances that <span style="color:#7acccc">both Raters</span> said were correct</font> </center> ] -- .pull-right[ <center> `B` <br><br> <font size="3"><i>Disagreement</i></font><br><br> <font size="3">The total number of instances that <span style="color:#b27acc">Rater 2</span> said was incorrect, but</i> <span style="color:#94cc7a">Rater 1</span> said were correct</font> </center> ] -- <br> .pull-left[ <center> `C` <br><br> <font size="3"><i>Disagreement</i></span><br><br> <font size="3">The total number of instances that <span style="color:#94cc7a">Rater 1</span> said was incorrect, but <span style="color:#b27acc">Rater 2</span> said were correct</font> </center> ] -- .pull-right[ <center> `D` <br><br> <font size="3"><i>Agreement</i></span><br><br> <font size="3">The total number of instances that <span style="color:#7acccc">both Raters</span> said were incorrect</font> </center> ] --- ## Calculations --- ## Probability of <i>Agreement</i> `\(P_0\)` <br> <br> <center> Number in Full <i>Agreement</i> / Total </center> .center2[ `$$\dfrac{A+D}{A+B+C+D}$$` ] --- ## Probability of <b>Correct</b> Random <i>Agreement</i> `\(P_{correct}\)` <br> <br> <center> Number Correct in Full or Partial <i>Agreement</i> / Total </center> .center2[ `$$\dfrac{A+B}{A+B+C+D}\cdot\dfrac{A+C}{A+B+C+D}$$` ] --- ## Probability of <b>Incorrect</b> Random <i>Agreement</i> `\(P_{incorrect}\)` -- <br> <br> <center> Number Incorrect in Full or Partial <i>Agreement</i> / Total </center> .center2[ `$$\dfrac{C+D}{A+B+C+D}\cdot\dfrac{B+D}{A+B+C+D}$$` ] --- ## Probability of Random <i>Agreement</i> -- .center2[ `$$P_e = P_{correct} + P_{incorrect}$$` ] --- ## Kappa `\(\kappa\)` -- .center2[ `$$\kappa = \dfrac{P_0 - P_e}{1-P_e}$$` ] --- ## Interpretation -- .center2[ <table class=" lightable-paper" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; width: auto !important; margin-left: auto; margin-right: auto;'> <thead> <tr> <th style="text-align:center;color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> Value of `kappa` </th> <th style="text-align:left;color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> Strength of <i>Agreement</i> </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> `leq 0.20` </td> <td style="text-align:left;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> Poor </td> </tr> <tr> <td style="text-align:center;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> `0.21-0.40` </td> <td style="text-align:left;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> Fair </td> </tr> <tr> <td style="text-align:center;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> `0.41-0.60` </td> <td style="text-align:left;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> Moderate </td> </tr> <tr> <td style="text-align:center;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> `0.61-0.80` </td> <td style="text-align:left;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> Good </td> </tr> <tr> <td style="text-align:center;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> `geq 0.81` </td> <td style="text-align:left;width: 10em; color: #ffffff !important;background-color: #212121 !important;vertical-align: middle !important;"> Very Good </td> </tr> </tbody> </table> ] --- # That’s it! Any questions?