Check-in [e0308f0c75]
Not logged in

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:add tcllib upstream changes
Timelines: family | ancestors | descendants | both | trunk
Files: files | file ages | folders
SHA1: e0308f0c7530552060638744db4763ef5970bdda
User & Date: chw 2019-11-10 18:08:18.332
Context
2019-11-10
19:02
add yeti to undroidwish build scripts check-in: adbe1957ab user: chw tags: trunk
18:08
add tcllib upstream changes check-in: e0308f0c75 user: chw tags: trunk
17:59
import yeti 0.4.2 check-in: b2a387058e user: chw tags: trunk
Changes
Unified Diff Ignore Whitespace Patch
assets/tcllib1.19/math/interpolate.tcl became a regular file.
Changes to assets/tcllib1.19/math/pdf_stat.tcl.
20
21
22
23
24
25
26

27
28

29
30

31
32

33
34
35
36
37
38
39
	    cdf-exponential cdf-triangular cdf-symmetric-triangular \
	    cdf-students-t \
	    random-normal random-uniform random-lognormal \
	    random-exponential random-triangular \
	    histogram-uniform \
	    pdf-gamma pdf-poisson pdf-chisquare pdf-students-t pdf-beta \
	    pdf-weibull pdf-gumbel pdf-pareto pdf-cauchy \

	    cdf-gamma cdf-poisson cdf-chisquare cdf-beta cdf-F \
	    cdf-weibull cdf-gumbel cdf-pareto cdf-cauchy \

	    random-gamma random-poisson random-chisquare random-students-t random-beta \
	    random-weibull random-gumbel random-pareto random-cauchy \

	    incompleteGamma incompleteBeta \
	    estimate-pareto empirical-distribution bootstrap estimate-exponential


    variable cdf_normal_prob     {}
    variable cdf_normal_x        {}
    variable cdf_toms322_cached  {}
    variable initialised_cdf     0
    variable twopi               [expr {2.0*acos(-1.0)}]
    variable pi                  [expr {acos(-1.0)}]







>


>


>

|
>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
	    cdf-exponential cdf-triangular cdf-symmetric-triangular \
	    cdf-students-t \
	    random-normal random-uniform random-lognormal \
	    random-exponential random-triangular \
	    histogram-uniform \
	    pdf-gamma pdf-poisson pdf-chisquare pdf-students-t pdf-beta \
	    pdf-weibull pdf-gumbel pdf-pareto pdf-cauchy \
	    pdf-laplace pdf-kumaraswamy pdf-negative-binomial \
	    cdf-gamma cdf-poisson cdf-chisquare cdf-beta cdf-F \
	    cdf-weibull cdf-gumbel cdf-pareto cdf-cauchy \
	    cdf-laplace cdf-kumaraswamy cdf-negative-binomial \
	    random-gamma random-poisson random-chisquare random-students-t random-beta \
	    random-weibull random-gumbel random-pareto random-cauchy \
	    random-laplace random-kumaraswamy random-negative-binomial \
	    incompleteGamma incompleteBeta \
	    estimate-pareto empirical-distribution bootstrap estimate-exponential \
	    estimate-laplace estimate-negative-binomial

    variable cdf_normal_prob     {}
    variable cdf_normal_x        {}
    variable cdf_toms322_cached  {}
    variable initialised_cdf     0
    variable twopi               [expr {2.0*acos(-1.0)}]
    variable pi                  [expr {acos(-1.0)}]
217
218
219
220
221
222
223






















































































224
225
226
227
228
229
230
    }

    if { $x < 0.0 } { return 0.0 }
    if { $x > 700.0*$mean } { return 0.0 }

    set prob [expr {exp(-$x/double($mean))/$mean}]























































































    return $prob
}


# cdf-normal --
#    Return the cumulative probability belonging to a normal distribution
#







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
    }

    if { $x < 0.0 } { return 0.0 }
    if { $x > 700.0*$mean } { return 0.0 }

    set prob [expr {exp(-$x/double($mean))/$mean}]

    return $prob
}


# pdf-laplace --
#    Return the probabilities belonging to a Laplace
#    distribution
#
# Arguments:
#    mean     Mean of the distribution
#    scale    Scale (the spreading) of the distribution
#    x        Value for which the probability must be determined
#
# Result:
#    Probability of value x under the given distribution
#
proc ::math::statistics::pdf-laplace { mean scale x } {
    variable NEGSTDEV
    variable OUTOFRANGE

    if { $scale <= 0.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: scale must be positive"
    }

    set prob [expr {exp(-($x-$mean)/double($scale))/(2.0*$scale)}]

    return $prob
}


# pdf-kumaraswamy --
#    Return the probabilities belonging to a Kumaraswamy
#    distribution (akin to the Beta distribution, but tractable)
#
#    Arguments:
#    a         First parameter of the Kumaraswamy distribution
#    b         Second parameter of the Kumaraswamy distribution
#    x         Value of variate
#
# Result:
#    Probability of value x under the given distribution
#
proc ::math::statistics::pdf-kumaraswamy { a b x } {
    variable OUTOFRANGE

    if { $a <= 0.0 || $b <= 0.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: parameters a and b must be positive"
    }

    set prob [expr {$a * $b * $x**($a-1) * (1.0 -$x**$a)**($b-1)}]

    return $prob
}


# pdf-negative-binomial --
#    Return the probability belonging to a negative binomial
#    distribution
#
#    Arguments:
#    r         Allowed number of failures for the distribution
#    p         Probability of success for the negative bionomial distribution
#    k         Value of variate (integer)
#
# Result:
#    Probability of k successes under the given distribution
#
proc ::math::statistics::pdf-negative-binomial { r p k } {
    variable OUTOFRANGE
    variable INVALID

    if { $p < 0.0 || $p >= 1.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: parameter p must be non-negative and lower than 1"
    }

    if { int($r) != $r || $r < 1 } {
	return -code error -errorcode ARG -errorinfo $INVALIDE \
		"$INVALID: parameter r must be a positive integer"
    }

    set coeff [::math::choose [expr {$k+$r-1}] $k]
    set prob  [expr {$coeff * (1.0 - $p)**$r * $p ** $k}]

    return $prob
}


# cdf-normal --
#    Return the cumulative probability belonging to a normal distribution
#
453
454
455
456
457
458
459





























































































460
461
462
463
464
465
466
    if { $x > 30.0*$mean } { return 1.0 }

    set prob [expr {1.0-exp(-$x/double($mean))}]

    return $prob
}































































































# Inverse-cdf-uniform --
#    Return the argument belonging to the cumulative probability
#    for a uniform distribution (parameters as minimum/maximum)
#
# Arguments:
#    pmin      Minimum of the distribution







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
    if { $x > 30.0*$mean } { return 1.0 }

    set prob [expr {1.0-exp(-$x/double($mean))}]

    return $prob
}


# cdf-laplace --
#    Return the cumulative probabilities belonging to a Laplace
#    distribution
#
# Arguments:
#    mean     Mean of the distribution
#    scale    Scale (the spreading) of the distribution
#    x        Value for which the probability must be determined
#
# Result:
#    Cumulative probability of value x under the given distribution
#
proc ::math::statistics::cdf-laplace { mean scale x } {
    variable NEGSTDEV
    variable OUTOFRANGE

    if { $scale <= 0.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: scale must be positive"
    }

    if { $x < $mean } {
        set prob [expr {0.5 * exp(($x-$mean)/double($scale))}]
    } else {
        set prob [expr {1.0 - 0.5 * exp(($mean-$x)/double($scale))}]
    }

    return $prob
}


# cdf-kumaraswamy --
#    Return the cumulative probabilities belonging to a Kumaraswamy
#    distribution (akin to the Beta distribution, but tractable)
#
#    Arguments:
#    a         First parameter of the Kumaraswamy distribution
#    b         Second parameter of the Kumaraswamy distribution
#    x         Value of variate
#
# Result:
#    Cumulative probability of value x under the given distribution
#
proc ::math::statistics::cdf-kumaraswamy { a b x } {
    variable OUTOFRANGE

    if { $a <= 0.0 || $b <= 0.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: parameters a and b must be positive"
    }

    set prob [expr {1.0 - (1.0-$x**$a) ** $b}]

    return $prob
}


# cdf-negative-binomial --
#    Return the cumulative probability for a negative binomial distribution
#
#    Arguments:
#    r         Allowed number of failures for the distribution
#    p         Probability of success for the negative bionomial distribution
#    k         Value of variate (integer)
#
# Result:
#    Cumulative probability for up to k successes under the given distribution
#
proc ::math::statistics::cdf-negative-binomial { r p k } {
    variable OUTOFRANGE
    variable INVALID

    if { $p < 0.0 || $p >= 1.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: parameter p must be non-negative and lower than 1"
    }

    if { int($r) != $r || $r < 1 } {
	return -code error -errorcode ARG -errorinfo $INVALIDE \
		"$INVALID: parameter r must be a positive integer"
    }

    set sum 0.0

    for { set i 0 } { $i <= $k } { incr i } {
        set prob [pdf-negative-binomial $r $p $i]
        set sum  [expr {$sum + $prob}]
    }

    return $sum
}


# Inverse-cdf-uniform --
#    Return the argument belonging to the cumulative probability
#    for a uniform distribution (parameters as minimum/maximum)
#
# Arguments:
#    pmin      Minimum of the distribution
2121
2122
2123
2124
2125
2126
2127


















































































































2128
2129
2130
2131
2132
2133
2134
    set retval {}
    for {set i 0} {$i < $number} {incr i} {
        lappend retval [expr {$location + $scale * tan( $pi * (rand() - 0.5))}]
    }
    return $retval
}




















































































































# estimate-pareto --
#    Estimate the parameters of a Pareto distribution
#
# Arguments:
#    values    Values that are supposed to be distributed according to Pareto
#







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2304
2305
2306
2307
2308
2309
2310
2311
2312
2313
2314
2315
2316
2317
2318
2319
2320
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341
2342
2343
2344
2345
2346
2347
2348
2349
2350
2351
2352
2353
2354
2355
2356
2357
2358
2359
2360
2361
2362
2363
2364
2365
2366
2367
2368
2369
2370
2371
2372
2373
2374
2375
2376
2377
2378
2379
2380
2381
2382
2383
2384
2385
2386
2387
2388
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402
2403
2404
2405
2406
2407
2408
2409
2410
2411
2412
2413
2414
2415
2416
2417
2418
2419
2420
2421
2422
2423
2424
2425
2426
2427
2428
2429
2430
2431
    set retval {}
    for {set i 0} {$i < $number} {incr i} {
        lappend retval [expr {$location + $scale * tan( $pi * (rand() - 0.5))}]
    }
    return $retval
}


# random-laplace --
#    Generate a list of Laplace distributed deviates
#
# Arguments:
#    mean     Mean of the distribution
#    scale    Scale (the spreading) of the distribution
#    number   Number of values to return
#
# Result:
#    List of random numbers
#
proc ::math::statistics::random-laplace { mean scale number } {
    variable NEGSTDEV
    variable OUTOFRANGE

    if { $scale <= 0.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: scale must be positive"
    }

    set retval {}
    for {set i 0} {$i < $number} {incr i} {
        set p [expr {rand()}]
        if { $p < 0.5 } {
            set x [expr {$mean + $scale * log(1.0 - 2.0*abs($p-0.5))}]
        } else {
            set x [expr {$mean - $scale * log(1.0 - 2.0*abs($p-0.5))}]
        }
        lappend retval $x
    }

    return $retval
}


# random-kumaraswamy --
#    Generate a list of Kumaraswamy distributed deviates
#
#    Arguments:
#    a         First parameter of the Kumaraswamy distribution
#    b         Second parameter of the Kumaraswamy distribution
#    number    Number of values to return
#
# Result:
#    List of random numbers
#
proc ::math::statistics::random-kumaraswamy { a b number } {
    variable OUTOFRANGE

    if { $a <= 0.0 || $b <= 0.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: parameters a and b must be positive"
    }

    set ra [expr {1.0 / $a}]
    set rb [expr {1.0 / $b}]

    set retval {}
    for {set i 0} {$i < $number} {incr i} {
        set p [expr {rand()}]
        set x [expr {( 1.0 - (1.0-$p) ** $rb ) ** $ra}]

        lappend retval $x
    }

    return $retval
}


# random-negative-binomial --
#    Generate a list of deviates according to the negative binomial distribution
#
#    Arguments:
#    r         Allowed number of failures for the distribution
#    p         Probability of success for the negative bionomial distribution
#    number    Number of values to return
#
# Result:
#    List of random numbers
#
proc ::math::statistics::random-negative-binomial { r p number } {
    variable OUTOFRANGE
    variable INVALID

    if { $p < 0.0 || $p >= 1.0 } {
	return -code error -errorcode ARG -errorinfo $OUTOFRANGE \
		"$OUTOFRANGE: parameter p must be non-negative and lower than 1"
    }

    if { int($r) != $r }} $r < 1} {
	return -code error -errorcode ARG -errorinfo $INVALIDE \
		"$INVALID: parameter r must be a positive integer"
    }

    set retval {}
    for {set i 0} {$i < $number} {incr i} {
        set success 0
        set failure 0

        while { $failure < $r } {
            if { rand() <= $p } {
                incr success
            } else {
                incr failure
            }
        }

        lappend retval $success
    }

    return $retval
}


# estimate-pareto --
#    Estimate the parameters of a Pareto distribution
#
# Arguments:
#    values    Values that are supposed to be distributed according to Pareto
#
2201
2202
2203
2204
2205
2206
2207
































































2208
2209
2210
2211
2212
2213
2214
    }

    set parameter [expr {$sum/double($count)}]
    set stdev     [expr {$parameter / sqrt($count)}]

    return [list $parameter $stdev]
}

































































# empirical-distribution --
#    Determine the empirical distribution
#
# Arguments:
#    values    Values that are to be examined
#







>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>







2498
2499
2500
2501
2502
2503
2504
2505
2506
2507
2508
2509
2510
2511
2512
2513
2514
2515
2516
2517
2518
2519
2520
2521
2522
2523
2524
2525
2526
2527
2528
2529
2530
2531
2532
2533
2534
2535
2536
2537
2538
2539
2540
2541
2542
2543
2544
2545
2546
2547
2548
2549
2550
2551
2552
2553
2554
2555
2556
2557
2558
2559
2560
2561
2562
2563
2564
2565
2566
2567
2568
2569
2570
2571
2572
2573
2574
2575
    }

    set parameter [expr {$sum/double($count)}]
    set stdev     [expr {$parameter / sqrt($count)}]

    return [list $parameter $stdev]
}


# estimate-laplace --
#    Estimate the parameter of a Laplace distribution
#
# Arguments:
#    values    Values that are supposed to be Laplace distributed
#
# Result:
#    Estimates of respectively the mean and the scale of the Laplace distribution
#    (See https://en.wikipedia.org/wiki/Laplace_distribution)
#
# Note:
#    According to Wikipedia the estimators are maximum-likelihood estimators
#
proc ::math::statistics::estimate-laplace { values } {

    set mean [median $values]

    set sum   0.0
    set count 0

    foreach v $values {
        if { $v != "" } {
            set  sum [expr {$sum + abs($v-$mean)}]
            incr count
        }
    }

    set scale [expr {$sum/double($count)}]

    return [list $mean $scale]
}


# estimate-negative-binomial --
#    Estimate the parameter p of a negative binomial distribution,
#    given the allowed number of failures
#
# Arguments:
#    r         Allowed number of failures
#    values    Values that are supposed to be distributed according to a negative binomial distribution
#
# Result:
#    Estimate of the probability of success for the distribution
#
# Note:
#    According to Wikipedia the estimators are maximum-likelihood estimators
#
proc ::math::statistics::estimate-negative-binomial { r values } {

    set sum   0.0
    set count 0

    foreach v $values {
        if { $v != "" } {
            set  sum [expr {$sum + $v}]
            incr count
        }
    }

    return [expr {$sum/double($count * $r + $sum)}]
}


# empirical-distribution --
#    Determine the empirical distribution
#
# Arguments:
#    values    Values that are to be examined
#
2293
2294
2295
2296
2297
2298
2299
2300
2301
2302
2303
2304
2305
2306
2307
    foreach z    {4.417 3.891 3.291 2.576 2.241 1.960 1.645 1.150 0.674
    0.319 0.126 0.063 0.0125} \
	    pexp {1.e-5 1.e-4 1.e-3 1.e-2 0.025 0.050 0.100 0.250 0.500
    0.750 0.900 0.950 0.990 } {
	set prob [::math::statistics::Cdf-toms322 1 5000 [expr {$z*$z}]]
	puts "$z - $pexp - [expr {1.0-$prob}]"
    }

    puts "Normal distribution (inverted; one-tailed)"
    foreach p {0.001 0.01 0.1 0.25 0.5 0.75 0.9 0.99 0.999} {
	puts "$p - [::math::statistics::Inverse-cdf-normal 0.0 1.0 $p]"
    }
    puts "Normal random variables"
    set rndvars [::math::statistics::random-normal 1.0 2.0 20]
    puts $rndvars







<







2654
2655
2656
2657
2658
2659
2660

2661
2662
2663
2664
2665
2666
2667
    foreach z    {4.417 3.891 3.291 2.576 2.241 1.960 1.645 1.150 0.674
    0.319 0.126 0.063 0.0125} \
	    pexp {1.e-5 1.e-4 1.e-3 1.e-2 0.025 0.050 0.100 0.250 0.500
    0.750 0.900 0.950 0.990 } {
	set prob [::math::statistics::Cdf-toms322 1 5000 [expr {$z*$z}]]
	puts "$z - $pexp - [expr {1.0-$prob}]"
    }

    puts "Normal distribution (inverted; one-tailed)"
    foreach p {0.001 0.01 0.1 0.25 0.5 0.75 0.9 0.99 0.999} {
	puts "$p - [::math::statistics::Inverse-cdf-normal 0.0 1.0 $p]"
    }
    puts "Normal random variables"
    set rndvars [::math::statistics::random-normal 1.0 2.0 20]
    puts $rndvars
Changes to assets/tcllib1.19/math/pkgIndex.tcl.
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
package ifneeded math::bigfloat          1.2.2 [list source [file join $dir bigfloat.tcl]]
package ifneeded math::machineparameters 0.1   [list source [file join $dir machineparameters.tcl]]

if {![package vsatisfies [package provide Tcl] 8.5]} {return}
package ifneeded math::calculus          0.8.1 [list source [file join $dir calculus.tcl]]
# statistics depends on linearalgebra (for multi-variate linear regression).
# statistics depends on optimize (for logistic regression).
package ifneeded math::statistics        1.3.1 [list source [file join $dir statistics.tcl]]
package ifneeded math::linearalgebra     1.1.6 [list source [file join $dir linalg.tcl]]
package ifneeded math::calculus::symdiff 1.0.1 [list source [file join $dir symdiff.tcl]]
package ifneeded math::bigfloat          2.0.2 [list source [file join $dir bigfloat2.tcl]]
package ifneeded math::numtheory         1.1.1 [list source [file join $dir numtheory.tcl]]
package ifneeded math::decimal           1.0.3 [list source [file join $dir decimal.tcl]]
package ifneeded math::geometry          1.3.1 [list source [file join $dir geometry.tcl]]
package ifneeded math::trig              1.0   [list source [file join $dir trig.tcl]]







|







17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
package ifneeded math::bigfloat          1.2.2 [list source [file join $dir bigfloat.tcl]]
package ifneeded math::machineparameters 0.1   [list source [file join $dir machineparameters.tcl]]

if {![package vsatisfies [package provide Tcl] 8.5]} {return}
package ifneeded math::calculus          0.8.1 [list source [file join $dir calculus.tcl]]
# statistics depends on linearalgebra (for multi-variate linear regression).
# statistics depends on optimize (for logistic regression).
package ifneeded math::statistics        1.5.0 [list source [file join $dir statistics.tcl]]
package ifneeded math::linearalgebra     1.1.6 [list source [file join $dir linalg.tcl]]
package ifneeded math::calculus::symdiff 1.0.1 [list source [file join $dir symdiff.tcl]]
package ifneeded math::bigfloat          2.0.2 [list source [file join $dir bigfloat2.tcl]]
package ifneeded math::numtheory         1.1.1 [list source [file join $dir numtheory.tcl]]
package ifneeded math::decimal           1.0.3 [list source [file join $dir decimal.tcl]]
package ifneeded math::geometry          1.3.1 [list source [file join $dir geometry.tcl]]
package ifneeded math::trig              1.0   [list source [file join $dir trig.tcl]]
Changes to assets/tcllib1.19/math/statistics.tcl.
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# version 0.9.3: added histogram-alt, corrected test-normal
# version 1.0:   added test-anova-F
# version 1.0.1: correction in pdf-lognormal and cdf-lognormal
# version 1.1:   added test-Tukey-range and test-Dunnett
# version 1.3:   added wasserstein-distance, kl-divergence and logit regression

package require Tcl 8.5 ; # 8.5+ feature in test-anova-F and others: **-operator
package provide math::statistics 1.3.1
package require math

if {![llength [info commands ::lrepeat]]} {
    # Forward portability, emulate lrepeat
    proc ::lrepeat {n args} {
	if {$n < 1} {
	    return -code error "must have a count of at least 1"







|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# version 0.9.3: added histogram-alt, corrected test-normal
# version 1.0:   added test-anova-F
# version 1.0.1: correction in pdf-lognormal and cdf-lognormal
# version 1.1:   added test-Tukey-range and test-Dunnett
# version 1.3:   added wasserstein-distance, kl-divergence and logit regression

package require Tcl 8.5 ; # 8.5+ feature in test-anova-F and others: **-operator
package provide math::statistics 1.5.0
package require math

if {![llength [info commands ::lrepeat]]} {
    # Forward portability, emulate lrepeat
    proc ::lrepeat {n args} {
	if {$n < 1} {
	    return -code error "must have a count of at least 1"
61
62
63
64
65
66
67

68
69
70
71
72
73
74
	    test-Duckworth test-anova-F test-Tukey-range test-Dunnett
    #
    # Error messages
    #
    variable NEGSTDEV   {Zero or negative standard deviation}
    variable TOOFEWDATA {Too few or invalid data}
    variable OUTOFRANGE {Argument out of range}


    #
    # Coefficients involved
    #
    variable factorNormalPdf
    set factorNormalPdf [expr {sqrt(8.0*atan(1.0))}]








>







61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
	    test-Duckworth test-anova-F test-Tukey-range test-Dunnett
    #
    # Error messages
    #
    variable NEGSTDEV   {Zero or negative standard deviation}
    variable TOOFEWDATA {Too few or invalid data}
    variable OUTOFRANGE {Argument out of range}
    variable INVALID    {Argument invalid}

    #
    # Coefficients involved
    #
    variable factorNormalPdf
    set factorNormalPdf [expr {sqrt(8.0*atan(1.0))}]

Changes to assets/tcllib1.19/math/wilcoxon.tcl.
1
2
3


4
5
6
7
8
9
10
# statistics_new.tcl --
#     Implementation of the Wilcoxon test: test if the medians
#     of two samples are the same


#

# test-Wilcoxon
#     Compute the statistic that indicates if the medians of two
#     samples are the same
#
# Arguments:
|


>
>







1
2
3
4
5
6
7
8
9
10
11
12
# wilcoxon.tcl --
#     Implementation of the Wilcoxon test: test if the medians
#     of two samples are the same
#
#     Also: Levene's and Brown-Forsythe's test
#

# test-Wilcoxon
#     Compute the statistic that indicates if the medians of two
#     samples are the same
#
# Arguments:
222
223
224
225
226
227
228












































































































#
# Result:
#     Rank correlation coefficient
#
proc ::math::statistics::spearman-rank {sample_a sample_b} {
    return [lindex [spearman-rank-extended $sample_a $sample_b] 0]
}



















































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#
# Result:
#     Rank correlation coefficient
#
proc ::math::statistics::spearman-rank {sample_a sample_b} {
    return [lindex [spearman-rank-extended $sample_a $sample_b] 0]
}

# test-Levene --
#     Compute the Levene statistic that indicates if the variances of
#     groups of data are the same
#
# Arguments:
#     groups         List of groups of values to be examined
#
# Result:
#     Statistic for the test (an F statistic with k-1, N-k degrees
#     of freedom - k the number of groups and N the total number
#     of values)
#     The test uses the mean of the values in the groups.
#
proc ::math::statistics::test-Levene {groups} {

    return [Test-Levene-Brown-Forsythe 0 $groups]
}

# test-Brown-Forsythe --
#     Compute the Brown-Forsythe statistic that indicates if the variances of
#     groups of data are the same
#
# Arguments:
#     groups         List of groups of values to be examined
#
# Result:
#     Statistic for the test (an F statistic with k-1, N-k degrees
#     of freedom - k the number of groups and N the total number
#     of values)
#     The test uses the median of the values in the groups.
#
proc ::math::statistics::test-Brown-Forsythe {groups} {

    return [Test-Levene-Brown-Forsythe 1 $groups]
}

# Test-Levene-Brown-Forsythe --
#     Compute either the Levene or the Brown-Forsythe statistic that indicates
#     if the variances of groups of data are the same
#
# Arguments:
#     choice         Which of the two versions
#     groups         List of groups of values to be examined
#
# Result:
#     Statistic for the test
#     The test uses either the mean or the median of the values in the groups.
#
proc ::math::statistics::Test-Levene-Brown-Forsythe {choice groups} {

    #
    # Compute the deviations from the mean/median within each group
    #
    set alldevs {}
    set zscores {}
    set zmeans  {}
    foreach group $groups {
        if { $choice } {
            set zm [median $group]
        } else {
            set zm [mean $group]
        }
        set zgroup {}
        foreach element $group {
            lappend zgroup [expr {abs($element-$zm)}]
        }

        set alldevs [concat $alldevs $zgroup]
        lappend zscores $zgroup
        lappend zmeans  [mean $zgroup]
    }

    set zoverall [mean $alldevs]

    set ndata   [llength $alldevs]
    set ngroups [llength $groups]

    #
    # Compute the numerator of the statistic
    #
    set sumsqmeans 0.0

    foreach zm $zmeans group $groups {
        set n          [llength $group]
        set sumsqmeans [expr { $sumsqmeans + $n * ($zm - $zoverall)**2 }]
    }

    #
    # Compute the denominator
    #
    set sumsqpergroup 0.0

    foreach zm $zmeans zs $zscores {
        set sumsq 0.0
        foreach z $zs {
            set sumsq [expr {$sumsq + ($z-$zm)**2}]
        }

        set sumsqpergroup [expr { $sumsqpergroup + $sumsq }]
    }

    #
    # Finally, the statistic
    #

    return [expr { ($ndata-$ngroups) * $sumsqmeans / double( ($ngroups-1) * $sumsqpergroup ) }]
}