:: Probability on Finite and Discrete Set and Uniform Distribution :: by Hiroyuki Okazaki :: :: Received May 5, 2009 :: Copyright (c) 2009-2021 Association of Mizar Users :: (Stowarzyszenie Uzytkownikow Mizara, Bialystok, Poland). :: This code can be distributed under the GNU General Public Licence :: version 3.0 or later, or the Creative Commons Attribution-ShareAlike :: License version 3.0 or later, subject to the binding interpretation :: detailed in file COPYING.interpretation. :: See COPYING.GPL and COPYING.CC-BY-SA for the full text of these :: licenses, or see http://www.gnu.org/licenses/gpl.html and :: http://creativecommons.org/licenses/by-sa/3.0/. environ vocabularies NUMBERS, XBOOLE_0, FINSET_1, FINSEQ_1, RELAT_1, TARSKI, RPR_1, NAT_1, CARD_1, SUBSET_1, FUNCT_1, REAL_1, ARYTM_3, UPROOTS, SETFAM_1, MATRPROB, VALUED_1, CARD_3, PROB_2, XXREAL_0, ORDINAL4, PARTFUN1, DIST_1; notations TARSKI, XBOOLE_0, SUBSET_1, FINSET_1, CARD_1, CARD_3, ORDINAL1, NUMBERS, XCMPLX_0, XXREAL_0, XREAL_0, RELAT_1, FUNCT_1, RELSET_1, PARTFUN1, FINSEQ_1, FINSEQ_2, VALUED_1, FUNCT_2, SETFAM_1, RPR_1, PROB_2, RVSUM_1, UPROOTS, MATRPROB; constructors RPR_1, RVSUM_1, BINOP_2, PARTFUN3, PROB_2, UPROOTS, MATRPROB, RELSET_1; registrations FUNCT_1, SUBSET_1, NAT_1, XREAL_0, ORDINAL1, FINSEQ_1, RELAT_1, FINSET_1, NUMBERS, VALUED_0, CARD_1, MATRPROB, ENTROPY1; requirements NUMERALS, REAL, BOOLE, SUBSET, ARITHM; begin :: Probability on Finite and Discrete Set notation let S be set, s be FinSequence of S; synonym whole_event (s) for dom s; end; notation let S be set, s be FinSequence of S, x be set; synonym event_pick (x,s) for Coim(s,x); end; definition let S be set, s be FinSequence of S, x be set; redefine func event_pick (x,s) -> Event of whole_event(s); end; definition let S be finite set, s be FinSequence of S, x be set; func frequency(x,s) -> Nat equals :: DIST_1:def 1 card (event_pick(x,s)); end; ::$CT theorem :: DIST_1:2 for S be set, s be FinSequence of S, e be set st e in whole_event(s) ex x be Element of S st e in event_pick(x,s); definition let S be finite set, s be FinSequence of S, x be set; func FDprobability (x,s) -> Real equals :: DIST_1:def 2 frequency(x,s) / (len s); end; ::$CT theorem :: DIST_1:4 for S be finite set, s be FinSequence of S, x be set holds frequency(x,s)=(len s)* FDprobability (x,s); definition let S be finite set, s be FinSequence of S; func FDprobSEQ (s) -> FinSequence of REAL means :: DIST_1:def 3 dom it = Seg card S & for n be Nat st n in dom it holds it.n=FDprobability((canFS(S)).n,s); end; theorem :: DIST_1:5 for S be finite set, s be FinSequence of S, x be set holds FDprobability(x,s)=prob(event_pick(x,s)); definition let S be finite set, s,t be FinSequence of S; pred s,t -are_prob_equivalent means :: DIST_1:def 4 for x be set holds FDprobability (x,s)=FDprobability (x,t); reflexivity; symmetry; end; theorem :: DIST_1:6 for S be finite set, s,t,u be FinSequence of S st s,t -are_prob_equivalent & t,u -are_prob_equivalent holds s,u -are_prob_equivalent; definition let S be finite set, s be FinSequence of S; func Finseq-EQclass(s) -> Subset of S* equals :: DIST_1:def 5 {t where t is FinSequence of S: s,t -are_prob_equivalent}; end; registration let S be finite set, s be FinSequence of S; cluster Finseq-EQclass(s) -> non empty; end; theorem :: DIST_1:7 for S be finite set, s,t be FinSequence of S holds s,t -are_prob_equivalent iff t in Finseq-EQclass(s); theorem :: DIST_1:8 for S be finite set, s be FinSequence of S holds s in Finseq-EQclass(s); theorem :: DIST_1:9 for S be finite set, s,t be FinSequence of S holds s,t -are_prob_equivalent iff Finseq-EQclass(s) = Finseq-EQclass(t); definition let S be finite set; func distribution_family(S) -> Subset-Family of S* means :: DIST_1:def 6 for A being Subset of S* holds A in it iff ex s being FinSequence of S st A = Finseq-EQclass(s); end; registration let S be finite set; cluster distribution_family(S) -> non empty; end; theorem :: DIST_1:10 for S be finite set, s,t be FinSequence of S holds s,t -are_prob_equivalent iff FDprobSEQ(s) = FDprobSEQ(t); theorem :: DIST_1:11 for S be finite set, s,t be FinSequence of S st t in Finseq-EQclass(s) holds FDprobSEQ(s)=FDprobSEQ(t); definition let S be finite set; func GenProbSEQ (S) -> Function of distribution_family(S),REAL* means :: DIST_1:def 7 for x be Element of distribution_family(S) holds ex s be FinSequence of S st s in x & it.x=FDprobSEQ (s); end; theorem :: DIST_1:12 for S be finite set, s be FinSequence of S holds (GenProbSEQ (S)).(Finseq-EQclass(s)) = FDprobSEQ (s); registration let S be finite set; cluster GenProbSEQ S -> one-to-one; end; definition let S be finite set, p be ProbFinS FinSequence of REAL; assume ex s be FinSequence of S st FDprobSEQ (s)=p; func distribution (p,S) -> Element of distribution_family(S) means :: DIST_1:def 8 (GenProbSEQ S).it = p; end; definition let S be finite set, s be FinSequence of S; func freqSEQ (s) -> FinSequence of NAT means :: DIST_1:def 9 dom it = Seg card S & for n be Nat st n in dom it holds it.n=(len s) * (FDprobSEQ(s)).n; end; theorem :: DIST_1:13 for S be non empty finite set, s be non empty FinSequence of S, n be Nat st n in Seg (card S) ex x be Element of S st (freqSEQ(s)).n=frequency(x,s)& x=(canFS(S)).n; theorem :: DIST_1:14 for S be finite set, s be FinSequence of S holds freqSEQ (s) =(len s)* ((FDprobSEQ(s))); theorem :: DIST_1:15 for S be finite set, s be FinSequence of S holds Sum (freqSEQ (s))=(len s)* Sum(FDprobSEQ(s)); theorem :: DIST_1:16 for S be non empty finite set, s be non empty FinSequence of S, n be Nat st n in dom s ex m be Nat st (freqSEQ(s)).m = frequency(s.n,s) & s.n=(canFS(S)).m; theorem :: DIST_1:17 for S be Function,L be FinSequence of NAT st S is disjoint_valued & dom S = dom L & for i be Nat st i in dom S holds S.i is finite & L.i = card (S.i) holds Union S is finite & card Union S = Sum L; theorem :: DIST_1:18 for S be non empty finite set, s be non empty FinSequence of S holds Sum freqSEQ (s) = len s; theorem :: DIST_1:19 for S be non empty finite set, s be non empty FinSequence of S holds Sum FDprobSEQ (s) = 1; registration let S be non empty finite set, s be non empty FinSequence of S; cluster FDprobSEQ s -> ProbFinS; end; definition let S be non empty finite set; mode distProbFinS of S -> ProbFinS FinSequence of REAL means :: DIST_1:def 10 len it = card S & ex s be FinSequence of S st FDprobSEQ (s) = it; end; theorem :: DIST_1:20 for S be non empty finite set, p be distProbFinS of S holds distribution(p,S) is Element of distribution_family(S) & (GenProbSEQ S).distribution(p,S) = p; begin :: Uniform distribution definition let S be finite set, s be FinSequence of S; attr s is uniformly_distributed means :: DIST_1:def 11 for n be Nat st n in dom FDprobSEQ (s) holds (FDprobSEQ (s)).n=1/(card S); end; theorem :: DIST_1:21 for S be finite set, s be FinSequence of S st s is uniformly_distributed holds FDprobSEQ (s) is constant; theorem :: DIST_1:22 for S be finite set, s,t be FinSequence of S st s is uniformly_distributed & s,t -are_prob_equivalent holds t is uniformly_distributed; theorem :: DIST_1:23 for S be finite set, s,t be FinSequence of S st s is uniformly_distributed & t is uniformly_distributed holds s,t -are_prob_equivalent; registration let S be finite set; cluster canFS(S) -> uniformly_distributed for FinSequence of S; end; definition let S be finite set; func uniform_distribution(S) -> Element of distribution_family(S) means :: DIST_1:def 12 for s be FinSequence of S holds s in it iff s is uniformly_distributed; end; registration let S be non empty finite set; cluster constant for distProbFinS of S; end; definition let S be non empty finite set; func Uniform_FDprobSEQ(S) -> distProbFinS of S equals :: DIST_1:def 13 FDprobSEQ canFS S; end; registration let S be non empty finite set; cluster Uniform_FDprobSEQ(S) -> constant; end; theorem :: DIST_1:24 for S be non empty finite set holds uniform_distribution(S) = distribution(Uniform_FDprobSEQ(S),S);