Basic usage of the package.
First, let’s create 5 clusters normally distributed around 1 to 5, with sd of 0.3:
## id V1 V2 true_clust
## 1 1 1.0261098 0.8789749 1
## 2 2 1.2082016 1.3878250 1
## 3 3 0.4074118 1.3766459 1
## 4 4 0.6051425 0.6178642 1
## 5 5 0.9524910 1.1564759 1
## 6 6 1.0521462 1.1003523 1
## 7 7 1.0824339 0.9615825 1
## 8 8 1.1844157 1.1233981 1
## 9 9 1.0455999 0.7943182 1
## 10 10 0.7854149 1.0792299 1
## 11 11 1.0210792 1.0148444 1
## 12 12 0.4673453 1.7059383 1
## 13 13 1.1874463 1.3096575 1
## 14 14 0.8660050 1.4647951 1
## 15 15 0.6774450 1.2602245 1
## 16 16 1.1722299 1.2319227 1
## 17 17 0.5210028 1.3175402 1
## 18 18 0.8446220 0.9294415 1
## 19 19 0.7843899 1.3028062 1
## 20 20 1.0987512 1.1828475 1
## 21 21 0.7061653 0.9081029 1
## 22 22 0.8097757 1.5410117 1
## 23 23 1.0355111 1.1280316 1
## 24 24 0.7933902 0.9487525 1
## 25 25 0.9196697 0.5746029 1
## 26 26 1.1158750 0.9110239 1
## 27 27 1.0225545 1.2589772 1
## 28 28 0.5656501 1.0978835 1
## 29 29 1.4112862 0.8262509 1
## 30 30 0.8374893 0.8430044 1
## 31 31 1.2817694 1.0326764 1
## 32 32 0.6429815 1.0066302 1
## 33 33 1.0422322 0.8143470 1
## 34 34 1.1554849 0.7581531 1
## 35 35 0.7382609 0.6445084 1
## 36 36 0.6174064 1.5658486 1
## 37 37 1.2322509 0.9360519 1
## 38 38 1.0664780 0.4662921 1
## 39 39 1.5402956 0.9916549 1
## 40 40 0.9002048 1.1083222 1
## 41 41 0.6686538 0.8906331 1
## 42 42 1.1649300 0.5197539 1
## 43 43 0.4598309 1.3919587 1
## 44 44 1.2098375 0.9882193 1
## 45 45 1.6444289 1.1863258 1
## 46 46 0.9009659 0.8880477 1
## 47 47 1.1907833 0.8702168 1
## 48 48 0.6477844 0.9888838 1
## 49 49 1.1872958 1.4506953 1
## 50 50 0.9209193 1.3881039 1
## 51 51 1.0306521 1.1531862 1
## 52 52 1.0165386 1.5388289 1
## 53 53 1.3660712 0.8130430 1
## 54 54 0.7028710 0.5542610 1
## 55 55 1.3475285 1.6116860 1
## 56 56 1.4838698 0.8363814 1
## 57 57 1.4701719 1.0088474 1
## 58 58 0.8452572 1.0180214 1
## 59 59 1.3778488 1.1640458 1
## 60 60 0.5782077 1.1141623 1
## 61 61 1.5977919 1.1992580 1
## 62 62 1.3806729 0.8147248 1
## 63 63 1.0612339 1.1848853 1
## 64 64 0.8444858 1.0577277 1
## 65 65 0.4775080 1.1730890 1
## 66 66 1.0170843 1.7271554 1
## 67 67 0.8116216 0.9659902 1
## 68 68 0.9547799 1.1695377 1
## 69 69 1.1267423 0.8728154 1
## 70 70 0.7633143 0.9193829 1
## 71 71 1.1200924 1.0618220 1
## 72 72 1.2760440 1.3197330 1
## 73 73 1.3331154 0.8181071 1
## 74 74 0.6103232 1.2971236 1
## 75 75 1.0733139 0.6211208 1
## 76 76 1.3590314 0.7965386 1
## 77 77 1.0956969 0.9418009 1
## 78 78 0.7798725 1.0848282 1
## 79 79 0.8840384 1.2756488 1
## 80 80 0.9639136 1.0888227 1
## 81 81 0.9828408 0.8680878 1
## 82 82 1.3777760 0.5978993 1
## 83 83 1.4223231 1.2930153 1
## 84 84 0.6977858 1.3715870 1
## 85 85 0.8497892 1.3137351 1
## 86 86 0.7315108 1.7112728 1
## 87 87 1.2868578 1.0365225 1
## 88 88 1.2382604 1.1371517 1
## 89 89 0.7734909 0.9699200 1
## 90 90 1.2163963 1.1401624 1
## 91 91 1.5565348 1.0742591 1
## 92 92 0.8407294 1.1583621 1
## 93 93 0.6293467 1.3518135 1
## 94 94 0.9492449 1.1646492 1
## 95 95 1.0176978 0.8171222 1
## 96 96 0.7293823 1.4980478 1
## 97 97 1.1091045 0.9631714 1
## 98 98 1.1203562 0.5722272 1
## 99 99 1.0120280 0.6676874 1
## 100 100 0.8652859 0.9072811 1
## 101 101 2.2507390 1.9734441 2
## 102 102 1.7111898 1.4200256 2
## 103 103 1.9420641 2.1271098 2
## 104 104 1.8473650 2.2720518 2
## 105 105 2.3362771 1.5908843 2
## 106 106 1.7730098 2.5610983 2
## 107 107 1.9314666 2.3425966 2
## 108 108 1.5556423 2.2254545 2
## 109 109 1.7456221 2.0292042 2
## 110 110 2.2823768 2.0903847 2
## 111 111 1.6564114 1.7883869 2
## 112 112 2.0871240 2.0333653 2
## 113 113 2.3165806 1.5842302 2
## 114 114 2.0965648 2.0872073 2
## 115 115 1.4835689 1.8331407 2
## 116 116 2.5872570 2.2380236 2
## 117 117 2.6848959 2.2263639 2
## 118 118 2.0350289 2.1211310 2
## 119 119 1.7968176 2.5712147 2
## 120 120 2.3919229 1.7891205 2
## 121 121 2.0821343 1.8197605 2
## 122 122 2.4191544 2.2517732 2
## 123 123 2.4019949 1.7326654 2
## 124 124 2.5926465 2.5594668 2
## 125 125 1.9408834 2.3052861 2
## 126 126 1.5549761 1.9005603 2
## 127 127 2.1191262 1.6481778 2
## 128 128 2.1825576 1.8092879 2
## 129 129 2.0836692 2.3271355 2
## 130 130 1.9312853 2.2593607 2
## 131 131 1.9074311 2.1591312 2
## 132 132 2.0672354 1.3796532 2
## 133 133 1.8630051 1.9181871 2
## 134 134 2.1546865 2.5561643 2
## 135 135 1.5212970 2.4327141 2
## 136 136 1.6934812 1.5337411 2
## 137 137 2.0074968 1.9250483 2
## 138 138 2.3650280 2.4040551 2
## 139 139 1.9946912 1.8481383 2
## 140 140 2.4641530 2.3764864 2
## 141 141 1.6062787 2.0116725 2
## 142 142 1.9332370 1.9134960 2
## 143 143 1.6034844 2.1019813 2
## 144 144 1.8774210 2.0600012 2
## 145 145 1.6196067 1.6925646 2
## 146 146 2.5005191 1.8960383 2
## 147 147 1.5326075 1.9784592 2
## 148 148 2.1294801 2.0637206 2
## 149 149 1.7302110 1.6931117 2
## 150 150 1.7683858 2.0212679 2
## 151 151 1.6265316 2.2111318 2
## 152 152 1.6412212 1.8536057 2
## 153 153 2.0175564 1.9695532 2
## 154 154 1.4046822 1.7861228 2
## 155 155 1.7118952 1.8946449 2
## 156 156 2.0159079 2.2202097 2
## 157 157 1.9978108 1.9627807 2
## 158 158 1.7467426 2.2380811 2
## 159 159 1.9664537 1.8383967 2
## 160 160 2.4394158 1.9986309 2
## 161 161 2.3159907 2.0334511 2
## 162 162 1.4274191 2.0114730 2
## 163 163 1.7181576 1.8792156 2
## 164 164 1.7026063 1.8163556 2
## 165 165 1.5779771 2.2480382 2
## 166 166 1.9203426 2.3704888 2
## 167 167 2.0069635 1.8451700 2
## 168 168 1.8325267 1.7415076 2
## 169 169 1.3455124 1.7951035 2
## 170 170 1.5983914 1.7675893 2
## 171 171 1.9731544 2.0227249 2
## 172 172 2.4525061 1.8036962 2
## 173 173 1.5473594 1.9714050 2
## 174 174 1.9270898 2.0394617 2
## 175 175 1.9160513 1.8629466 2
## 176 176 1.5318938 2.3317852 2
## 177 177 1.4961472 1.8122467 2
## 178 178 1.8877376 1.8662566 2
## 179 179 1.8301150 1.9615677 2
## 180 180 1.8450436 2.1222798 2
## 181 181 2.2311471 1.8595561 2
## 182 182 2.0098754 2.6510669 2
## 183 183 2.1456931 2.1631415 2
## 184 184 1.9221451 1.8618524 2
## 185 185 2.3252137 2.2418015 2
## 186 186 2.2216200 1.9485285 2
## 187 187 2.0332722 1.4354763 2
## 188 188 1.8062872 2.0424980 2
## 189 189 2.1704762 1.9608621 2
## 190 190 2.2995006 1.4887770 2
## 191 191 1.3853784 1.7359899 2
## 192 192 2.7447421 2.0566132 2
## 193 193 2.0100253 1.7841727 2
## 194 194 2.1998917 1.7847640 2
## 195 195 1.8997662 2.2572201 2
## 196 196 2.1560595 1.5400715 2
## 197 197 2.1952147 1.8842155 2
## 198 198 1.9428259 1.7772452 2
## 199 199 1.8721822 2.1863995 2
## 200 200 2.2076150 1.9373723 2
## 201 201 2.9644982 3.3045560 3
## 202 202 3.0767623 3.0423814 3
## 203 203 3.3480945 2.9347146 3
## 204 204 2.9757784 2.6994756 3
## 205 205 3.6237907 3.0982942 3
## 206 206 3.1093692 2.5971876 3
## 207 207 2.9448006 3.0504244 3
## 208 208 3.4877780 2.6188103 3
## 209 209 3.2889513 2.8808945 3
## 210 210 3.3371604 3.6150125 3
## 211 211 2.6532187 3.2721277 3
## 212 212 2.7724586 2.7386915 3
## 213 213 2.6488393 3.0757640 3
## 214 214 2.2355128 3.4279864 3
## 215 215 2.8721251 3.8205650 3
## 216 216 2.5414902 3.3653067 3
## 217 217 3.1064884 2.9693804 3
## 218 218 3.3445566 3.1595375 3
## 219 219 2.6882466 3.0957165 3
## 220 220 3.3605355 2.8162665 3
## 221 221 3.1225768 2.5457724 3
## 222 222 2.9216233 2.8181887 3
## 223 223 3.3885296 2.6978747 3
## 224 224 2.9788114 3.0930984 3
## 225 225 2.9996620 2.9405271 3
## 226 226 2.9231382 3.0038614 3
## 227 227 3.1610709 2.9907439 3
## 228 228 3.1463583 3.1137368 3
## 229 229 2.8712579 3.4172526 3
## 230 230 2.6763562 3.1215626 3
## 231 231 2.7568793 3.3169666 3
## 232 232 2.6210659 3.0198710 3
## 233 233 2.3799133 3.1524617 3
## 234 234 3.1580385 2.9083142 3
## 235 235 3.6636855 3.0234735 3
## 236 236 3.4069018 2.7186596 3
## 237 237 2.8895418 3.2578791 3
## 238 238 2.8983658 2.8773267 3
## 239 239 3.0458218 3.6716847 3
## 240 240 3.1373504 2.9705692 3
## 241 241 2.7603036 3.2741140 3
## 242 242 2.5684575 2.9700122 3
## 243 243 3.1090833 2.9459835 3
## 244 244 2.7971848 2.8170710 3
## 245 245 3.2929566 2.8851342 3
## 246 246 3.1277096 3.1412690 3
## 247 247 2.9642676 2.8230784 3
## 248 248 3.4477851 3.5492766 3
## 249 249 2.9031761 2.9824638 3
## 250 250 3.0623919 3.0857292 3
## 251 251 3.0021596 3.3278480 3
## 252 252 2.9929373 3.3052969 3
## 253 253 3.1603051 2.9051714 3
## 254 254 3.4191972 2.8875159 3
## 255 255 3.0565207 2.8055598 3
## 256 256 3.3351354 2.5403902 3
## 257 257 3.5337701 3.1381065 3
## 258 258 3.0277587 3.1104109 3
## 259 259 3.2624869 3.0819888 3
## 260 260 2.8899357 2.2924429 3
## 261 261 2.8455557 3.4330489 3
## 262 262 2.9410421 2.5220921 3
## 263 263 3.0303561 3.3840035 3
## 264 264 3.1603250 2.6901277 3
## 265 265 3.1808988 3.2473704 3
## 266 266 3.3236162 3.2332316 3
## 267 267 2.9664698 2.8995253 3
## 268 268 2.8603979 2.8253417 3
## 269 269 3.1223213 2.8696162 3
## 270 270 3.0593400 3.1338066 3
## 271 271 2.7386037 3.4270862 3
## 272 272 3.0559020 3.2736268 3
## 273 273 3.3540388 2.8783431 3
## 274 274 2.9712758 2.8914531 3
## 275 275 2.7854939 3.1026714 3
## 276 276 2.8425763 3.0493717 3
## 277 277 2.8866258 2.6415335 3
## 278 278 3.6073911 2.9196142 3
## 279 279 2.7023403 2.7342157 3
## 280 280 3.2500848 3.2142301 3
## 281 281 3.2725901 3.1032279 3
## 282 282 3.3623681 3.1765918 3
## 283 283 3.1812447 3.2693132 3
## 284 284 2.4879224 2.5940344 3
## 285 285 2.3788675 3.1956744 3
## 286 286 3.2537871 3.2684014 3
## 287 287 3.0053145 3.1645367 3
## 288 288 2.7887662 3.2133506 3
## 289 289 2.7860445 3.1687454 3
## 290 290 3.3884620 2.7436036 3
## 291 291 2.8207429 3.2909459 3
## 292 292 2.6650897 3.2522069 3
## 293 293 2.8604777 2.9610793 3
## 294 294 2.6595696 2.8621988 3
## 295 295 3.6919376 3.1869565 3
## 296 296 2.6181407 2.7466759 3
## 297 297 2.7649412 2.9552225 3
## 298 298 3.1684679 3.2133397 3
## 299 299 2.4592127 2.7515784 3
## 300 300 3.3604407 3.3986271 3
## 301 301 3.8150552 3.8911655 4
## 302 302 3.8620399 4.0148583 4
## 303 303 3.9303883 4.4566098 4
## 304 304 3.4494426 3.7811612 4
## 305 305 3.5593164 4.0012256 4
## 306 306 4.6754920 3.9073560 4
## 307 307 4.1275032 3.9260513 4
## 308 308 3.4495542 4.2364470 4
## 309 309 4.1506832 4.6906092 4
## 310 310 4.2831032 4.4027792 4
## 311 311 4.0805069 3.8229632 4
## 312 312 3.7435101 3.9462776 4
## 313 313 3.4707019 3.6563803 4
## 314 314 3.6423924 4.7996343 4
## 315 315 4.3845184 4.2347890 4
## 316 316 4.9294786 4.3220426 4
## 317 317 4.1385435 4.1449025 4
## 318 318 3.8126941 3.6681342 4
## 319 319 3.9630498 3.7205179 4
## 320 320 3.7301834 3.5606677 4
## 321 321 4.4614430 4.4918087 4
## 322 322 3.7391237 4.2120136 4
## 323 323 4.5335820 3.8693868 4
## 324 324 4.0891795 4.2719885 4
## 325 325 3.9966463 3.9687613 4
## 326 326 3.7952514 4.0315092 4
## 327 327 3.7472274 4.1969199 4
## 328 328 4.1297671 4.3557411 4
## 329 329 4.1715826 4.1925438 4
## 330 330 3.6592755 3.7783528 4
## 331 331 3.6837094 4.4049570 4
## 332 332 4.2188747 4.2994978 4
## 333 333 4.1829662 4.7916938 4
## 334 334 4.2188903 4.3751630 4
## 335 335 3.9010871 3.5328454 4
## 336 336 3.3571560 4.0663318 4
## 337 337 4.3217689 3.9261015 4
## 338 338 4.0917867 4.0564099 4
## 339 339 3.6259036 3.7131891 4
## 340 340 3.1774542 3.7296186 4
## 341 341 4.2516770 3.7832621 4
## 342 342 4.3521115 4.0404713 4
## 343 343 4.0647554 3.9041629 4
## 344 344 4.2476680 4.1827443 4
## 345 345 3.5262697 4.5034723 4
## 346 346 4.1090812 4.1956417 4
## 347 347 4.3368593 4.2822037 4
## 348 348 3.8414390 3.7130584 4
## 349 349 3.8273510 4.5809608 4
## 350 350 4.1962328 4.2749378 4
## 351 351 3.8323661 4.0047422 4
## 352 352 4.0595610 4.0291594 4
## 353 353 3.9139591 3.6938850 4
## 354 354 4.0633317 3.5810512 4
## 355 355 4.0211601 3.9291616 4
## 356 356 3.4055907 4.2089805 4
## 357 357 4.0383264 4.0610310 4
## 358 358 4.0023825 3.7142438 4
## 359 359 3.9555421 3.5568174 4
## 360 360 3.8696491 4.1284379 4
## 361 361 3.5307229 4.0540466 4
## 362 362 4.0532103 4.0004110 4
## 363 363 4.4426359 3.7634205 4
## 364 364 3.7439423 4.5551217 4
## 365 365 3.5727560 3.9462284 4
## 366 366 3.7386382 3.9048182 4
## 367 367 3.9806303 4.2083876 4
## 368 368 3.9196637 4.1747687 4
## 369 369 4.0454099 3.6438132 4
## 370 370 4.1605602 3.3823816 4
## 371 371 4.3511813 3.6806420 4
## 372 372 3.8349338 3.7167995 4
## 373 373 4.0369799 4.2736769 4
## 374 374 4.5103537 3.9703504 4
## 375 375 4.1557439 3.9223041 4
## 376 376 4.0623563 3.5519397 4
## 377 377 3.7816710 4.4896374 4
## 378 378 3.5810105 4.3806533 4
## 379 379 4.1300465 3.6097102 4
## 380 380 4.0786997 4.5747733 4
## 381 381 4.3724580 3.7500087 4
## 382 382 4.2635479 3.8884393 4
## 383 383 3.8538995 3.9490199 4
## 384 384 4.2875971 4.3024995 4
## 385 385 3.8019310 3.8387001 4
## 386 386 4.0597019 3.8668607 4
## 387 387 4.3921892 3.9147553 4
## 388 388 4.4878683 3.8997774 4
## 389 389 3.9414044 3.4991334 4
## 390 390 4.2047905 3.6156824 4
## 391 391 3.7232433 4.2947780 4
## 392 392 4.0337634 3.9896309 4
## 393 393 4.2602384 4.0058069 4
## 394 394 4.3666546 4.1811769 4
## 395 395 4.4041752 3.4869136 4
## 396 396 4.5298810 3.9097011 4
## 397 397 4.5461991 4.0606491 4
## 398 398 3.8435015 3.4712584 4
## 399 399 3.7759167 3.8559521 4
## 400 400 3.9367632 3.6361123 4
## 401 401 5.1959132 4.7043474 5
## 402 402 4.9710391 5.2283173 5
## 403 403 5.2799763 5.0112050 5
## 404 404 5.6039455 5.0396016 5
## 405 405 5.5179100 4.5518811 5
## 406 406 4.9051777 5.1580281 5
## 407 407 4.8888652 5.0582977 5
## 408 408 4.8590585 4.8616592 5
## 409 409 4.7994401 4.9630736 5
## 410 410 5.1173934 5.1073318 5
## 411 411 4.7668870 4.5677399 5
## 412 412 5.4565015 4.4270640 5
## 413 413 5.2758851 4.8787421 5
## 414 414 5.6392475 4.6031895 5
## 415 415 4.5688112 5.3011705 5
## 416 416 4.6240364 4.6747033 5
## 417 417 5.0268785 4.4299915 5
## 418 418 4.6862224 5.1837046 5
## 419 419 5.7793721 4.6168303 5
## 420 420 5.0485462 4.9370302 5
## 421 421 4.8721570 5.5486141 5
## 422 422 4.9258209 5.0644702 5
## 423 423 5.1591566 4.9150393 5
## 424 424 5.2794576 4.8490151 5
## 425 425 4.8533669 5.1182890 5
## 426 426 4.6015747 4.9464760 5
## 427 427 5.4087635 5.1468993 5
## 428 428 4.9965817 4.7661751 5
## 429 429 4.9084920 5.3954795 5
## 430 430 4.8831154 5.1394959 5
## 431 431 4.6306043 5.2673779 5
## 432 432 5.5279933 5.0271293 5
## 433 433 5.2391559 4.9783583 5
## 434 434 4.8532624 5.3298015 5
## 435 435 4.5080321 4.7898259 5
## 436 436 4.4412752 4.4000539 5
## 437 437 4.5036700 5.3610643 5
## 438 438 5.1605864 5.0310968 5
## 439 439 5.1678780 5.1732393 5
## 440 440 4.5366550 4.7473369 5
## 441 441 4.7146401 5.0213202 5
## 442 442 4.8810275 5.0811272 5
## 443 443 5.0529390 5.0122737 5
## 444 444 4.8143399 4.9662499 5
## 445 445 4.7348780 5.0355672 5
## 446 446 5.3251666 5.2523205 5
## 447 447 5.4223449 4.5777824 5
## 448 448 4.5660183 5.7456791 5
## 449 449 5.1397081 4.5611519 5
## 450 450 4.9501835 4.9273732 5
## 451 451 4.7763388 4.6618955 5
## 452 452 4.5096301 5.6182603 5
## 453 453 4.9949883 5.4341986 5
## 454 454 5.1945799 4.7561842 5
## 455 455 4.9441766 5.2289558 5
## 456 456 4.7580814 4.6930924 5
## 457 457 4.9361521 4.7071678 5
## 458 458 5.1024525 5.2535712 5
## 459 459 5.0841455 4.9981395 5
## 460 460 4.9661352 4.6498650 5
## 461 461 5.4798362 5.1375566 5
## 462 462 4.5382840 5.2913328 5
## 463 463 5.0506000 4.8869843 5
## 464 464 5.0524429 5.4307025 5
## 465 465 5.4705519 5.0203635 5
## 466 466 5.1896942 4.9896229 5
## 467 467 5.0728852 5.0661373 5
## 468 468 4.6308796 4.6952349 5
## 469 469 5.3343800 4.9137657 5
## 470 470 5.2598079 5.0505460 5
## 471 471 4.3349339 5.4654376 5
## 472 472 4.9278000 4.9821169 5
## 473 473 4.8608547 4.2846235 5
## 474 474 5.2351332 5.0325038 5
## 475 475 5.1462362 4.9174674 5
## 476 476 5.0051903 4.8017315 5
## 477 477 4.8004298 5.0220797 5
## 478 478 4.8450263 4.8948155 5
## 479 479 5.1140468 4.2417863 5
## 480 480 5.4210548 4.8498863 5
## 481 481 5.1201761 4.5085377 5
## 482 482 5.6491915 5.2044117 5
## 483 483 4.9122083 5.0925686 5
## 484 484 4.8756856 4.9103697 5
## 485 485 4.3647099 5.0351246 5
## 486 486 5.2045482 4.7066891 5
## 487 487 5.1872291 5.1087911 5
## 488 488 4.5548826 5.2882998 5
## 489 489 5.2269490 4.8130265 5
## 490 490 4.9194575 5.0309199 5
## 491 491 5.6167773 5.2426163 5
## 492 492 5.2150903 5.0670556 5
## 493 493 4.9006303 4.6887966 5
## 494 494 5.2478889 4.9758907 5
## 495 495 4.6539141 5.4967413 5
## 496 496 4.9108693 4.7573776 5
## 497 497 5.0094066 5.1521305 5
## 498 498 4.9779951 4.7643682 5
## 499 499 5.2422127 5.1636813 5
## 500 500 4.6108840 4.8566559 5
This is how our data looks like:
data %>% ggplot(aes(x = V1, y = V2, color = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "true cluster")
Now we can cluster it using kmeans++:
data_for_clust <- data %>% select(id, starts_with("V"))
km <- TGL_kmeans_tidy(data_for_clust,
k = 5,
metric = "euclid",
verbose = TRUE
)
## id column: id
## KMEans: will generate seeds
## KMeans into generate seeds
## at seed 0
## add new core from 295 to 0
## at seed 1
## done update min distance
## seed range 350 450
## picked up 466 dist was 1.60439
## add new core from 466 to 1
## at seed 2
## done update min distance
## seed range 300 400
## picked up 22 dist was 1.2156
## add new core from 22 to 2
## at seed 3
## done update min distance
## seed range 250 350
## picked up 388 dist was 0.674026
## add new core from 388 to 3
## at seed 4
## done update min distance
## seed range 200 300
## picked up 107 dist was 0.637651
## add new core from 107 to 4
## KMEans: reassign after init
## KMEans: iter 0
## KMEans: iter 1 changed 7
## KMEans: iter 1
## KMEans: iter 2 changed 9
## KMEans: iter 2
## KMEans: iter 3 changed 2
## KMEans: iter 3
## KMEans: iter 4 changed 0
The returned list contains 3 fields:
## [1] "centers" "cluster" "size"
km$centers
contains a tibble with clust
column and the cluster centers:
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 5.01 4.97
## 2 2 1.96 1.99
## 3 3 4.01 4.01
## 4 4 3.02 3.04
## 5 5 0.996 1.07
clusters are numbered according to order_func
(see ‘Custom cluster ordering’ section).
km$cluster
contains tibble with id
column with the observation id (1:n
if no id column was supplied), and clust
column with the observation assigned cluster:
## # A tibble: 500 × 2
## id clust
## <chr> <int>
## 1 1 5
## 2 2 5
## 3 3 5
## 4 4 5
## 5 5 5
## 6 6 5
## 7 7 5
## 8 8 5
## 9 9 5
## 10 10 5
## # ℹ 490 more rows
km$size
contains tibble with clust
column and n
column with the number of points in each cluster:
## # A tibble: 5 × 2
## clust n
## <int> <int>
## 1 1 100
## 2 2 99
## 3 3 99
## 4 4 102
## 5 5 100
We can now check our clustering performance - fraction of observations that were classified correctly (Note that match_clusters
function is internal to the package and is used only in this vignette):
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.992
And plot the results:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
By default, the clusters where ordered using the following function: hclust(dist(cor(t(centers))))
- hclust of the euclidean distance of the correlation matrix of the centers.
We can supply our own function to order the clusters using reorder_func
argument. The function would be applied to each center and he clusters would be ordered by the result.
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE,
reorder_func = median
)
km$centers
## # A tibble: 5 × 3
## clust V1 V2
## <int> <dbl> <dbl>
## 1 1 0.996 1.07
## 2 2 1.96 1.99
## 3 3 3.02 3.04
## 4 4 4.01 4.01
## 5 5 5.01 4.97
tglkmeans can deal with missing data, as long as at least one dimension is not missing. for example:
## id V1 V2 true_clust
## 1 1 1.0261098 0.8789749 1
## 2 2 1.2082016 1.3878250 1
## 3 3 0.4074118 1.3766459 1
## 4 4 0.6051425 0.6178642 1
## 5 5 0.9524910 1.1564759 1
## 6 6 1.0521462 1.1003523 1
## 7 7 1.0824339 0.9615825 1
## 8 8 1.1844157 1.1233981 1
## 9 9 1.0455999 0.7943182 1
## 10 10 0.7854149 1.0792299 1
## 11 11 1.0210792 1.0148444 1
## 12 12 NA 1.7059383 1
## 13 13 1.1874463 1.3096575 1
## 14 14 0.8660050 1.4647951 1
## 15 15 0.6774450 1.2602245 1
## 16 16 1.1722299 1.2319227 1
## 17 17 0.5210028 1.3175402 1
## 18 18 0.8446220 0.9294415 1
## 19 19 NA 1.3028062 1
## 20 20 1.0987512 1.1828475 1
## 21 21 0.7061653 0.9081029 1
## 22 22 0.8097757 1.5410117 1
## 23 23 1.0355111 1.1280316 1
## 24 24 0.7933902 0.9487525 1
## 25 25 0.9196697 0.5746029 1
## 26 26 1.1158750 0.9110239 1
## 27 27 1.0225545 1.2589772 1
## 28 28 NA 1.0978835 1
## 29 29 NA 0.8262509 1
## 30 30 0.8374893 0.8430044 1
## 31 31 1.2817694 1.0326764 1
## 32 32 0.6429815 1.0066302 1
## 33 33 NA 0.8143470 1
## 34 34 1.1554849 0.7581531 1
## 35 35 0.7382609 0.6445084 1
## 36 36 0.6174064 1.5658486 1
## 37 37 1.2322509 0.9360519 1
## 38 38 1.0664780 0.4662921 1
## 39 39 1.5402956 0.9916549 1
## 40 40 0.9002048 1.1083222 1
## 41 41 0.6686538 0.8906331 1
## 42 42 1.1649300 0.5197539 1
## 43 43 0.4598309 1.3919587 1
## 44 44 1.2098375 0.9882193 1
## 45 45 1.6444289 1.1863258 1
## 46 46 0.9009659 0.8880477 1
## 47 47 1.1907833 0.8702168 1
## 48 48 0.6477844 0.9888838 1
## 49 49 NA 1.4506953 1
## 50 50 0.9209193 1.3881039 1
## 51 51 NA 1.1531862 1
## 52 52 1.0165386 1.5388289 1
## 53 53 1.3660712 0.8130430 1
## 54 54 0.7028710 0.5542610 1
## 55 55 1.3475285 1.6116860 1
## 56 56 1.4838698 0.8363814 1
## 57 57 1.4701719 1.0088474 1
## 58 58 0.8452572 1.0180214 1
## 59 59 1.3778488 1.1640458 1
## 60 60 NA 1.1141623 1
## 61 61 1.5977919 1.1992580 1
## 62 62 1.3806729 0.8147248 1
## 63 63 NA 1.1848853 1
## 64 64 0.8444858 1.0577277 1
## 65 65 0.4775080 1.1730890 1
## 66 66 NA 1.7271554 1
## 67 67 0.8116216 0.9659902 1
## 68 68 0.9547799 1.1695377 1
## 69 69 1.1267423 0.8728154 1
## 70 70 0.7633143 0.9193829 1
## 71 71 1.1200924 1.0618220 1
## 72 72 NA 1.3197330 1
## 73 73 NA 0.8181071 1
## 74 74 0.6103232 1.2971236 1
## 75 75 1.0733139 0.6211208 1
## 76 76 1.3590314 0.7965386 1
## 77 77 1.0956969 0.9418009 1
## 78 78 0.7798725 1.0848282 1
## 79 79 0.8840384 1.2756488 1
## 80 80 0.9639136 1.0888227 1
## 81 81 NA 0.8680878 1
## 82 82 1.3777760 0.5978993 1
## 83 83 NA 1.2930153 1
## 84 84 0.6977858 1.3715870 1
## 85 85 NA 1.3137351 1
## 86 86 0.7315108 1.7112728 1
## 87 87 1.2868578 1.0365225 1
## 88 88 1.2382604 1.1371517 1
## 89 89 0.7734909 0.9699200 1
## 90 90 1.2163963 1.1401624 1
## 91 91 1.5565348 1.0742591 1
## 92 92 0.8407294 1.1583621 1
## 93 93 0.6293467 1.3518135 1
## 94 94 0.9492449 1.1646492 1
## 95 95 1.0176978 0.8171222 1
## 96 96 0.7293823 1.4980478 1
## 97 97 NA 0.9631714 1
## 98 98 1.1203562 0.5722272 1
## 99 99 1.0120280 0.6676874 1
## 100 100 0.8652859 0.9072811 1
## 101 101 2.2507390 1.9734441 2
## 102 102 1.7111898 1.4200256 2
## 103 103 1.9420641 2.1271098 2
## 104 104 1.8473650 2.2720518 2
## 105 105 NA 1.5908843 2
## 106 106 1.7730098 2.5610983 2
## 107 107 1.9314666 2.3425966 2
## 108 108 1.5556423 2.2254545 2
## 109 109 NA 2.0292042 2
## 110 110 2.2823768 2.0903847 2
## 111 111 1.6564114 1.7883869 2
## 112 112 2.0871240 2.0333653 2
## 113 113 2.3165806 1.5842302 2
## 114 114 2.0965648 2.0872073 2
## 115 115 NA 1.8331407 2
## 116 116 2.5872570 2.2380236 2
## 117 117 NA 2.2263639 2
## 118 118 2.0350289 2.1211310 2
## 119 119 NA 2.5712147 2
## 120 120 NA 1.7891205 2
## 121 121 2.0821343 1.8197605 2
## 122 122 2.4191544 2.2517732 2
## 123 123 2.4019949 1.7326654 2
## 124 124 NA 2.5594668 2
## 125 125 1.9408834 2.3052861 2
## 126 126 1.5549761 1.9005603 2
## 127 127 2.1191262 1.6481778 2
## 128 128 NA 1.8092879 2
## 129 129 2.0836692 2.3271355 2
## 130 130 NA 2.2593607 2
## 131 131 1.9074311 2.1591312 2
## 132 132 NA 1.3796532 2
## 133 133 1.8630051 1.9181871 2
## 134 134 2.1546865 2.5561643 2
## 135 135 1.5212970 2.4327141 2
## 136 136 1.6934812 1.5337411 2
## 137 137 2.0074968 1.9250483 2
## 138 138 2.3650280 2.4040551 2
## 139 139 1.9946912 1.8481383 2
## 140 140 2.4641530 2.3764864 2
## 141 141 1.6062787 2.0116725 2
## 142 142 1.9332370 1.9134960 2
## 143 143 NA 2.1019813 2
## 144 144 1.8774210 2.0600012 2
## 145 145 NA 1.6925646 2
## 146 146 NA 1.8960383 2
## 147 147 1.5326075 1.9784592 2
## 148 148 NA 2.0637206 2
## 149 149 1.7302110 1.6931117 2
## 150 150 1.7683858 2.0212679 2
## 151 151 1.6265316 2.2111318 2
## 152 152 1.6412212 1.8536057 2
## 153 153 2.0175564 1.9695532 2
## 154 154 1.4046822 1.7861228 2
## 155 155 1.7118952 1.8946449 2
## 156 156 NA 2.2202097 2
## 157 157 1.9978108 1.9627807 2
## 158 158 1.7467426 2.2380811 2
## 159 159 1.9664537 1.8383967 2
## 160 160 2.4394158 1.9986309 2
## 161 161 2.3159907 2.0334511 2
## 162 162 1.4274191 2.0114730 2
## 163 163 1.7181576 1.8792156 2
## 164 164 1.7026063 1.8163556 2
## 165 165 1.5779771 2.2480382 2
## 166 166 1.9203426 2.3704888 2
## 167 167 2.0069635 1.8451700 2
## 168 168 1.8325267 1.7415076 2
## 169 169 1.3455124 1.7951035 2
## 170 170 1.5983914 1.7675893 2
## 171 171 1.9731544 2.0227249 2
## 172 172 NA 1.8036962 2
## 173 173 1.5473594 1.9714050 2
## 174 174 1.9270898 2.0394617 2
## 175 175 1.9160513 1.8629466 2
## 176 176 1.5318938 2.3317852 2
## 177 177 NA 1.8122467 2
## 178 178 1.8877376 1.8662566 2
## 179 179 1.8301150 1.9615677 2
## 180 180 1.8450436 2.1222798 2
## 181 181 2.2311471 1.8595561 2
## 182 182 2.0098754 2.6510669 2
## 183 183 2.1456931 2.1631415 2
## 184 184 1.9221451 1.8618524 2
## 185 185 NA 2.2418015 2
## 186 186 2.2216200 1.9485285 2
## 187 187 2.0332722 1.4354763 2
## 188 188 1.8062872 2.0424980 2
## 189 189 NA 1.9608621 2
## 190 190 2.2995006 1.4887770 2
## 191 191 1.3853784 1.7359899 2
## 192 192 2.7447421 2.0566132 2
## 193 193 2.0100253 1.7841727 2
## 194 194 2.1998917 1.7847640 2
## 195 195 NA 2.2572201 2
## 196 196 2.1560595 1.5400715 2
## 197 197 2.1952147 1.8842155 2
## 198 198 1.9428259 1.7772452 2
## 199 199 NA 2.1863995 2
## 200 200 2.2076150 1.9373723 2
## 201 201 NA 3.3045560 3
## 202 202 NA 3.0423814 3
## 203 203 3.3480945 2.9347146 3
## 204 204 2.9757784 2.6994756 3
## 205 205 3.6237907 3.0982942 3
## 206 206 3.1093692 2.5971876 3
## 207 207 2.9448006 3.0504244 3
## 208 208 3.4877780 2.6188103 3
## 209 209 3.2889513 2.8808945 3
## 210 210 3.3371604 3.6150125 3
## 211 211 2.6532187 3.2721277 3
## 212 212 2.7724586 2.7386915 3
## 213 213 NA 3.0757640 3
## 214 214 NA 3.4279864 3
## 215 215 2.8721251 3.8205650 3
## 216 216 NA 3.3653067 3
## 217 217 3.1064884 2.9693804 3
## 218 218 3.3445566 3.1595375 3
## 219 219 2.6882466 3.0957165 3
## 220 220 3.3605355 2.8162665 3
## 221 221 NA 2.5457724 3
## 222 222 2.9216233 2.8181887 3
## 223 223 3.3885296 2.6978747 3
## 224 224 NA 3.0930984 3
## 225 225 2.9996620 2.9405271 3
## 226 226 2.9231382 3.0038614 3
## 227 227 3.1610709 2.9907439 3
## 228 228 3.1463583 3.1137368 3
## 229 229 2.8712579 3.4172526 3
## 230 230 2.6763562 3.1215626 3
## 231 231 2.7568793 3.3169666 3
## 232 232 2.6210659 3.0198710 3
## 233 233 NA 3.1524617 3
## 234 234 NA 2.9083142 3
## 235 235 3.6636855 3.0234735 3
## 236 236 3.4069018 2.7186596 3
## 237 237 2.8895418 3.2578791 3
## 238 238 2.8983658 2.8773267 3
## 239 239 3.0458218 3.6716847 3
## 240 240 3.1373504 2.9705692 3
## 241 241 2.7603036 3.2741140 3
## 242 242 2.5684575 2.9700122 3
## 243 243 3.1090833 2.9459835 3
## 244 244 2.7971848 2.8170710 3
## 245 245 3.2929566 2.8851342 3
## 246 246 3.1277096 3.1412690 3
## 247 247 NA 2.8230784 3
## 248 248 NA 3.5492766 3
## 249 249 NA 2.9824638 3
## 250 250 3.0623919 3.0857292 3
## 251 251 NA 3.3278480 3
## 252 252 2.9929373 3.3052969 3
## 253 253 NA 2.9051714 3
## 254 254 3.4191972 2.8875159 3
## 255 255 3.0565207 2.8055598 3
## 256 256 3.3351354 2.5403902 3
## 257 257 NA 3.1381065 3
## 258 258 3.0277587 3.1104109 3
## 259 259 3.2624869 3.0819888 3
## 260 260 2.8899357 2.2924429 3
## 261 261 2.8455557 3.4330489 3
## 262 262 2.9410421 2.5220921 3
## 263 263 3.0303561 3.3840035 3
## 264 264 NA 2.6901277 3
## 265 265 NA 3.2473704 3
## 266 266 3.3236162 3.2332316 3
## 267 267 2.9664698 2.8995253 3
## 268 268 2.8603979 2.8253417 3
## 269 269 3.1223213 2.8696162 3
## 270 270 3.0593400 3.1338066 3
## 271 271 2.7386037 3.4270862 3
## 272 272 3.0559020 3.2736268 3
## 273 273 3.3540388 2.8783431 3
## 274 274 2.9712758 2.8914531 3
## 275 275 2.7854939 3.1026714 3
## 276 276 2.8425763 3.0493717 3
## 277 277 2.8866258 2.6415335 3
## 278 278 3.6073911 2.9196142 3
## 279 279 2.7023403 2.7342157 3
## 280 280 3.2500848 3.2142301 3
## 281 281 3.2725901 3.1032279 3
## 282 282 NA 3.1765918 3
## 283 283 3.1812447 3.2693132 3
## 284 284 NA 2.5940344 3
## 285 285 2.3788675 3.1956744 3
## 286 286 3.2537871 3.2684014 3
## 287 287 3.0053145 3.1645367 3
## 288 288 2.7887662 3.2133506 3
## 289 289 2.7860445 3.1687454 3
## 290 290 3.3884620 2.7436036 3
## 291 291 2.8207429 3.2909459 3
## 292 292 2.6650897 3.2522069 3
## 293 293 2.8604777 2.9610793 3
## 294 294 NA 2.8621988 3
## 295 295 3.6919376 3.1869565 3
## 296 296 2.6181407 2.7466759 3
## 297 297 2.7649412 2.9552225 3
## 298 298 3.1684679 3.2133397 3
## 299 299 2.4592127 2.7515784 3
## 300 300 3.3604407 3.3986271 3
## 301 301 NA 3.8911655 4
## 302 302 NA 4.0148583 4
## 303 303 3.9303883 4.4566098 4
## 304 304 3.4494426 3.7811612 4
## 305 305 3.5593164 4.0012256 4
## 306 306 4.6754920 3.9073560 4
## 307 307 4.1275032 3.9260513 4
## 308 308 NA 4.2364470 4
## 309 309 4.1506832 4.6906092 4
## 310 310 4.2831032 4.4027792 4
## 311 311 4.0805069 3.8229632 4
## 312 312 3.7435101 3.9462776 4
## 313 313 NA 3.6563803 4
## 314 314 3.6423924 4.7996343 4
## 315 315 4.3845184 4.2347890 4
## 316 316 4.9294786 4.3220426 4
## 317 317 4.1385435 4.1449025 4
## 318 318 3.8126941 3.6681342 4
## 319 319 3.9630498 3.7205179 4
## 320 320 NA 3.5606677 4
## 321 321 4.4614430 4.4918087 4
## 322 322 3.7391237 4.2120136 4
## 323 323 NA 3.8693868 4
## 324 324 4.0891795 4.2719885 4
## 325 325 3.9966463 3.9687613 4
## 326 326 3.7952514 4.0315092 4
## 327 327 3.7472274 4.1969199 4
## 328 328 4.1297671 4.3557411 4
## 329 329 4.1715826 4.1925438 4
## 330 330 3.6592755 3.7783528 4
## 331 331 NA 4.4049570 4
## 332 332 NA 4.2994978 4
## 333 333 4.1829662 4.7916938 4
## 334 334 4.2188903 4.3751630 4
## 335 335 3.9010871 3.5328454 4
## 336 336 NA 4.0663318 4
## 337 337 4.3217689 3.9261015 4
## 338 338 4.0917867 4.0564099 4
## 339 339 3.6259036 3.7131891 4
## 340 340 3.1774542 3.7296186 4
## 341 341 4.2516770 3.7832621 4
## 342 342 NA 4.0404713 4
## 343 343 NA 3.9041629 4
## 344 344 4.2476680 4.1827443 4
## 345 345 3.5262697 4.5034723 4
## 346 346 4.1090812 4.1956417 4
## 347 347 NA 4.2822037 4
## 348 348 3.8414390 3.7130584 4
## 349 349 3.8273510 4.5809608 4
## 350 350 4.1962328 4.2749378 4
## 351 351 3.8323661 4.0047422 4
## 352 352 NA 4.0291594 4
## 353 353 3.9139591 3.6938850 4
## 354 354 4.0633317 3.5810512 4
## 355 355 4.0211601 3.9291616 4
## 356 356 3.4055907 4.2089805 4
## 357 357 4.0383264 4.0610310 4
## 358 358 NA 3.7142438 4
## 359 359 3.9555421 3.5568174 4
## 360 360 3.8696491 4.1284379 4
## 361 361 3.5307229 4.0540466 4
## 362 362 NA 4.0004110 4
## 363 363 4.4426359 3.7634205 4
## 364 364 NA 4.5551217 4
## 365 365 NA 3.9462284 4
## 366 366 3.7386382 3.9048182 4
## 367 367 3.9806303 4.2083876 4
## 368 368 3.9196637 4.1747687 4
## 369 369 4.0454099 3.6438132 4
## 370 370 4.1605602 3.3823816 4
## 371 371 4.3511813 3.6806420 4
## 372 372 3.8349338 3.7167995 4
## 373 373 4.0369799 4.2736769 4
## 374 374 NA 3.9703504 4
## 375 375 4.1557439 3.9223041 4
## 376 376 4.0623563 3.5519397 4
## 377 377 3.7816710 4.4896374 4
## 378 378 3.5810105 4.3806533 4
## 379 379 NA 3.6097102 4
## 380 380 4.0786997 4.5747733 4
## 381 381 4.3724580 3.7500087 4
## 382 382 4.2635479 3.8884393 4
## 383 383 NA 3.9490199 4
## 384 384 4.2875971 4.3024995 4
## 385 385 3.8019310 3.8387001 4
## 386 386 4.0597019 3.8668607 4
## 387 387 4.3921892 3.9147553 4
## 388 388 4.4878683 3.8997774 4
## 389 389 NA 3.4991334 4
## 390 390 NA 3.6156824 4
## 391 391 3.7232433 4.2947780 4
## 392 392 NA 3.9896309 4
## 393 393 4.2602384 4.0058069 4
## 394 394 4.3666546 4.1811769 4
## 395 395 4.4041752 3.4869136 4
## 396 396 4.5298810 3.9097011 4
## 397 397 NA 4.0606491 4
## 398 398 3.8435015 3.4712584 4
## 399 399 3.7759167 3.8559521 4
## 400 400 3.9367632 3.6361123 4
## 401 401 5.1959132 4.7043474 5
## 402 402 4.9710391 5.2283173 5
## 403 403 5.2799763 5.0112050 5
## 404 404 5.6039455 5.0396016 5
## 405 405 5.5179100 4.5518811 5
## 406 406 4.9051777 5.1580281 5
## 407 407 4.8888652 5.0582977 5
## 408 408 4.8590585 4.8616592 5
## 409 409 4.7994401 4.9630736 5
## 410 410 5.1173934 5.1073318 5
## 411 411 4.7668870 4.5677399 5
## 412 412 5.4565015 4.4270640 5
## 413 413 5.2758851 4.8787421 5
## 414 414 NA 4.6031895 5
## 415 415 4.5688112 5.3011705 5
## 416 416 4.6240364 4.6747033 5
## 417 417 5.0268785 4.4299915 5
## 418 418 4.6862224 5.1837046 5
## 419 419 5.7793721 4.6168303 5
## 420 420 5.0485462 4.9370302 5
## 421 421 NA 5.5486141 5
## 422 422 4.9258209 5.0644702 5
## 423 423 5.1591566 4.9150393 5
## 424 424 5.2794576 4.8490151 5
## 425 425 4.8533669 5.1182890 5
## 426 426 NA 4.9464760 5
## 427 427 5.4087635 5.1468993 5
## 428 428 4.9965817 4.7661751 5
## 429 429 4.9084920 5.3954795 5
## 430 430 NA 5.1394959 5
## 431 431 4.6306043 5.2673779 5
## 432 432 5.5279933 5.0271293 5
## 433 433 NA 4.9783583 5
## 434 434 NA 5.3298015 5
## 435 435 4.5080321 4.7898259 5
## 436 436 4.4412752 4.4000539 5
## 437 437 4.5036700 5.3610643 5
## 438 438 NA 5.0310968 5
## 439 439 5.1678780 5.1732393 5
## 440 440 4.5366550 4.7473369 5
## 441 441 4.7146401 5.0213202 5
## 442 442 NA 5.0811272 5
## 443 443 NA 5.0122737 5
## 444 444 4.8143399 4.9662499 5
## 445 445 4.7348780 5.0355672 5
## 446 446 5.3251666 5.2523205 5
## 447 447 5.4223449 4.5777824 5
## 448 448 4.5660183 5.7456791 5
## 449 449 5.1397081 4.5611519 5
## 450 450 NA 4.9273732 5
## 451 451 4.7763388 4.6618955 5
## 452 452 NA 5.6182603 5
## 453 453 4.9949883 5.4341986 5
## 454 454 5.1945799 4.7561842 5
## 455 455 4.9441766 5.2289558 5
## 456 456 NA 4.6930924 5
## 457 457 4.9361521 4.7071678 5
## 458 458 5.1024525 5.2535712 5
## 459 459 5.0841455 4.9981395 5
## 460 460 4.9661352 4.6498650 5
## 461 461 NA 5.1375566 5
## 462 462 4.5382840 5.2913328 5
## 463 463 5.0506000 4.8869843 5
## 464 464 5.0524429 5.4307025 5
## 465 465 NA 5.0203635 5
## 466 466 5.1896942 4.9896229 5
## 467 467 5.0728852 5.0661373 5
## 468 468 4.6308796 4.6952349 5
## 469 469 5.3343800 4.9137657 5
## 470 470 5.2598079 5.0505460 5
## 471 471 4.3349339 5.4654376 5
## 472 472 4.9278000 4.9821169 5
## 473 473 4.8608547 4.2846235 5
## 474 474 5.2351332 5.0325038 5
## 475 475 5.1462362 4.9174674 5
## 476 476 NA 4.8017315 5
## 477 477 4.8004298 5.0220797 5
## 478 478 4.8450263 4.8948155 5
## 479 479 5.1140468 4.2417863 5
## 480 480 5.4210548 4.8498863 5
## 481 481 NA 4.5085377 5
## 482 482 5.6491915 5.2044117 5
## 483 483 NA 5.0925686 5
## 484 484 NA 4.9103697 5
## 485 485 4.3647099 5.0351246 5
## 486 486 5.2045482 4.7066891 5
## 487 487 5.1872291 5.1087911 5
## 488 488 4.5548826 5.2882998 5
## 489 489 5.2269490 4.8130265 5
## 490 490 4.9194575 5.0309199 5
## 491 491 5.6167773 5.2426163 5
## 492 492 NA 5.0670556 5
## 493 493 4.9006303 4.6887966 5
## 494 494 5.2478889 4.9758907 5
## 495 495 4.6539141 5.4967413 5
## 496 496 4.9108693 4.7573776 5
## 497 497 5.0094066 5.1521305 5
## 498 498 4.9779951 4.7643682 5
## 499 499 5.2422127 5.1636813 5
## 500 500 4.6108840 4.8566559 5
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 5,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 5)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.978
and plotting the results (without the NA’s) we get:
d %>% ggplot(aes(x = V1, y = V2, color = factor(new_clust), shape = factor(true_clust))) +
geom_point() +
scale_color_discrete(name = "cluster") +
scale_shape_discrete(name = "true cluster") +
geom_point(data = km$centers, size = 7, color = "black", shape = "X")
## Warning: Removed 100 rows containing missing values (`geom_point()`).
Let’s move to higher dimensions (and higher noise):
data <- simulate_data(n = 100, sd = 0.3, nclust = 30, dims = 300)
km <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE
)
d <- tglkmeans:::match_clusters(data, km, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 1
Let’s compare it to R vanilla kmeans:
km_standard <- kmeans(data %>% select(starts_with("V")), 30)
km_standard$clust <- tibble(id = 1:nrow(data), clust = km_standard$cluster)
d <- tglkmeans:::match_clusters(data, km_standard, 30)
sum(d$true_clust == d$new_clust, na.rm = TRUE) / sum(!is.na(d$new_clust))
## [1] 0.75
We can see that kmeans++ clusters significantly better than R vanilla kmeans.
we can set the seed for the c++ random number generator, for reproducible results:
km1 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
km2 <- TGL_kmeans_tidy(data %>% select(id, starts_with("V")),
k = 30,
metric = "euclid",
verbose = FALSE,
seed = 60427
)
all(km1$centers[, -1] == km2$centers[, -1])
## [1] TRUE