
    c                       U d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlmZ ddlmZmZmZmZ ddlmZ ddlmZmZmZ ddlmZmZ dd	lmZmZmZmZmZmZm Z m!Z!m"Z"m#Z# ddl$Z$dd
l$m%Z% ddl&m'Z'm(Z(m)Z) ddl*m+Z+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1m2Z2 erddl3m4Z4 dZ5 ej6        d          Z7 ede8          Z9 ede8          Z: G d de          Z;edee9         f         Z<ee9df         Z=e#e!eef         Z> G d d          Z?ede?f         Z@ G d d          ZA G d d          ZB G d de          ZCe"de:e:f         ZDe5fdVd$ZEdWd(ZFdXd.ZG G d/ d0e          ZH G d1 d2          ZI	 dYdZd=ZJejK         G d> d                      ZLd?d@iZMdAeNdB<   d[dIZO G dJ dKe(eI          ZPd\dNZQd]d^dQZRdYd_dTZSeTdUk    r eS             dS dS )`a  A similarities / code duplication command line tool and pylint checker.

The algorithm is based on comparing the hash value of n successive lines of a file.
First the files are read and any line that doesn't fulfill requirement are removed
(comments, docstrings...)

Those stripped lines are stored in the LineSet class which gives access to them.
Then each index of the stripped lines collection is associated with the hash of n
successive entries of the stripped lines starting at the current index (n is the
minimum common lines option).

The common hashes between both linesets are then looked for. If there are matches, then
the match indices in both linesets are stored and associated with the corresponding
couples (start line number/end line number) in both files.

This association is then post-processed to handle the case of successive matches. For
example if the minimum common lines setting is set to four, then the hashes are
computed with four lines. If one of match indices couple (12, 34) is the
successor of another one (11, 33) then it means that there are in fact five lines which
are common.

Once post-processed the values of association table are the result looked for, i.e.
start and end lines numbers of common lines in both files.
    )annotationsN)defaultdict)Callable	GeneratorIterableSequence)getopt)BufferedIOBaseBufferedReaderBytesIO)chaingroupby)
TYPE_CHECKINGAnyDictList
NamedTupleNewTypeNoReturnTextIOTupleUnion)nodes)BaseCheckerBaseRawFileCheckertable_lines_from_stats)SectionTable)MessageDefinitionTupleOptions)LinterStatsdecoding_stream)PyLinter   z.*\w+Index
LineNumberc                  $    e Zd ZU ded<   ded<   dS )LineSpecifsr&   line_numberstrtextN__name__
__module____qualname____annotations__     7lib/python3.11/site-packages/pylint/checkers/similar.pyr(   r(   Q   s%         IIIIIr2   r(   
LinesChunkSuccessiveLinesLimitsc                      e Zd ZdZdZdd
ZdS )CplSuccessiveLinesLimitszHolds a SuccessiveLinesLimits object for each checked file and counts the number
    of common lines between both stripped lines collections extracted from both files.
    
first_filesecond_fileeffective_cmn_lines_nbr9   r5   r:   r;   intreturnNonec                0    || _         || _        || _        d S Nr8   )selfr9   r:   r;   s       r3   __init__z!CplSuccessiveLinesLimits.__init__h   s!     %&&<###r2   N)r9   r5   r:   r5   r;   r<   r=   r>   )r-   r.   r/   __doc__	__slots__rB   r1   r2   r3   r7   r7   a   s:          HI= = = = = =r2   r7   LineSetStartCouplec                  >    e Zd ZdZdZddZddZddZddZddZ	dS )r4   zlThe LinesChunk object computes and stores the hash of some consecutive stripped
    lines of a lineset.
    _fileid_index_hashfileidr*   num_liner<   linesIterable[str]r=   r>   c                |    || _         	 t          |          | _        	 t          d |D                       | _        d S )Nc              3  4   K   | ]}t          |          V  d S r@   )hash).0lins     r3   	<genexpr>z&LinesChunk.__init__.<locals>.<genexpr>   s(      99Cd3ii999999r2   )rH   r%   rI   sumrJ   )rA   rK   rL   rM   s       r3   rB   zLinesChunk.__init__   sD    "Q"8__	 99599999
11r2   or   boolc                Z    t          |t                    st          S | j        |j        k    S r@   )
isinstancer4   NotImplementedrJ   )rA   rV   s     r3   __eq__zLinesChunk.__eq__   s)    !Z(( 	"!!zQW$$r2   c                    | j         S r@   )rJ   rA   s    r3   __hash__zLinesChunk.__hash__   s
    zr2   c                8    d| j          d| j         d| j         dS )Nz<LinesChunk object for file z (z, z)>rG   r]   s    r3   __repr__zLinesChunk.__repr__   s+    X4<XX4;XX$*XXX	
r2   c                6    d| j          d| j         d| j         S )NzLinesChunk object for file z, starting at line z
 
Hash is rG   r]   s    r3   __str__zLinesChunk.__str__   s7    $$, $ $4; $ $z$ $	
r2   N)rK   r*   rL   r<   rM   rN   r=   r>   )rV   r   r=   rW   r=   r<   r=   r*   )
r-   r.   r/   rC   rD   rB   r[   r^   r`   rb   r1   r2   r3   r4   r4   x   s          /I
2 
2 
2 
2% % % %
   
 
 
 


 
 
 
 
 
r2   c                  x    e Zd ZdZdZddZedd	            Zedd
            Zej	        dd            ZddZ
dS )r5   zA class to handle the numbering of begin and end of successive lines.

    :note: Only the end line number can be updated.
    _start_endstartr&   endr=   r>   c                "    || _         || _        d S r@   rf   )rA   ri   rj   s      r3   rB   zSuccessiveLinesLimits.__init__   s    "' #			r2   c                    | j         S r@   )rg   r]   s    r3   ri   zSuccessiveLinesLimits.start   s
    {r2   c                    | j         S r@   rh   r]   s    r3   rj   zSuccessiveLinesLimits.end   s
    yr2   valuec                    || _         d S r@   rn   rA   ro   s     r3   rj   zSuccessiveLinesLimits.end   s    			r2   r*   c                (    d| j          d| j         dS )Nz<SuccessiveLinesLimits <;>>rf   r]   s    r3   r`   zSuccessiveLinesLimits.__repr__   s    E$+EE	EEEEr2   N)ri   r&   rj   r&   r=   r>   )r=   r&   )ro   r&   r=   r>   rd   )r-   r.   r/   rC   rD   rB   propertyri   rj   setterr`   r1   r2   r3   r5   r5      s         
 #I$ $ $ $    X    X 	Z   ZF F F F F Fr2   c                  H    e Zd ZU dZded<   ded<   ddZddZddZddZdS )rE   zEIndices in both linesets that mark the beginning of successive lines.r%   fst_lineset_indexsnd_lineset_indexr=   r*   c                (    d| j          d| j         dS )Nz<LineSetStartCouple <rs   rt   )rx   ry   r]   s    r3   r`   zLineSetStartCouple.__repr__   s#    WD$:WWT=SWWW	
r2   otherr   rW   c                z    t          |t                    st          S | j        |j        k    o| j        |j        k    S r@   )rY   rE   rZ   rx   ry   rA   r{   s     r3   r[   zLineSetStartCouple.__eq__   sB    %!344 	"!!"e&== B&%*AA	
r2   r<   c                T    t          | j                  t          | j                  z   S r@   )rQ   rx   ry   r]   s    r3   r^   zLineSetStartCouple.__hash__   s#    D*++d43I.J.JJJr2   ro   c                v    t          t          | j        |z             t          | j        |z                       S r@   )rE   r%   rx   ry   rq   s     r3   	incrementzLineSetStartCouple.increment   s:    !$(5011$(5011
 
 	
r2   Nrd   r{   r   r=   rW   rc   )ro   r%   r=   rE   )	r-   r.   r/   rC   r0   r`   r[   r^   r   r1   r2   r3   rE   rE      s         OO
 
 
 


 
 
 
K K K K
 
 
 
 
 
r2   LineSetlinesetmin_common_linesr<   r=   $tuple[HashToIndex_T, IndexToLines_T]c                R   t          t                    }i }t          d | j        D                       fdt	          |          D             }t          t          |           D ]^}}t          | j        |         j                  }	 | j        ||z            j        }n2# t          $ r% t          | j        d         j        dz             }Y nw xY wt          |          }	t          ||          ||	<   t          | j        |	g|R  }
||
                             |	           ||fS )a`  Return two dicts.

    The first associates the hash of successive stripped lines of a lineset
    to the indices of the starting lines.
    The second dict, associates the index of the starting line in the lineset's stripped lines to the
    couple [start, end] lines number in the corresponding file.

    :param lineset: lineset object (i.e the lines in a file)
    :param min_common_lines: number of successive lines that are used to compute the hash
    :return: a dict linking hashes to corresponding start index and a dict that links this
             index to the start and end lines in the file
    c              3  $   K   | ]}|j         V  d S r@   )r+   )rR   xs     r3   rT   zhash_lineset.<locals>.<genexpr>   s$      99Q!&999999r2   c                >    g | ]}t          |d                    S r@   )iter)rR   irM   s     r3   
<listcomp>z hash_lineset.<locals>.<listcomp>   s'    FFFT%)__FFFr2      )ri   rj   )r   listtuplestripped_linesrange	enumeratezipr&   r)   
IndexErrorr%   r5   r4   nameappend)r   r   
hash2indexindex2linesshifted_linesr   
succ_linesstart_linenumberend_linenumberindexl_crM   s              @r3   hash_linesetr      s^    T""JK 99'"899999EFFFFe4D.E.EFFFM#C$788 & &J%g&<Q&?&KLL	T$3A8H4HIUNN 	T 	T 	T'(>r(B(NQR(RSSNNN	T a2"
 
 
E u:z:::3u%%%%{""s   B!!,CCall_couplesCplIndexToCplLines_Tr>   c                   t          |                                           D ]}g }|                    t          d                    }|| v r| |         j        j        | |         j        _        | |         j        j        | |         j        _        | |         xj        dz  c_        |                    |           |                    t          d                    }|| v |D ](}	 | 	                    |           # t          $ r Y %w xY wdS )a  Removes all successive entries in the dictionary in argument.

    :param all_couples: collection that has to be cleaned up from successive entries.
                        The keys are couples of indices that mark the beginning of common entries
                        in both linesets. The values have two parts. The first one is the couple
                        of starting and ending line numbers of common successive lines in the first file.
                        The second part is the same for the second file.

    For example consider the following dict:

    >>> all_couples
    {(11, 34): ([5, 9], [27, 31]),
     (23, 79): ([15, 19], [45, 49]),
     (12, 35): ([6, 10], [28, 32])}

    There are two successive keys (11, 34) and (12, 35).
    It means there are two consecutive similar chunks of lines in both files.
    Thus remove last entry and update the last line numbers in the first entry

    >>> remove_successive(all_couples)
    >>> all_couples
    {(11, 34): ([5, 10], [27, 32]),
     (23, 79): ([15, 19], [45, 49])}
    r   N)r   keysr   r%   r9   rj   r:   r;   r   popKeyError)r   couple	to_removetesttargets        r3   remove_successiver     s5   4 ((**++  	a))k! 	,1<T1B1M1QK*.2=d2C2O2SK+/66!;66T""">>%((++D k! 	,   	 	F''''   	 s   &C<<
D	D	ls_1	stindex_1ls_2	stindex_2common_lines_nbc                    d | j         |||z            D             }d |j         |||z            D             }t          d t          ||          D                       S )a|  Return the effective number of common lines between lineset1
    and lineset2 filtered from non code lines.

    That is to say the number of common successive stripped
    lines except those that do not contain code (for example
    a line with only an ending parenthesis)

    :param ls_1: first lineset
    :param stindex_1: first lineset starting index
    :param ls_2: second lineset
    :param stindex_2: second lineset starting index
    :param common_lines_nb: number of common successive stripped lines before being filtered from non code lines
    :return: the number of common successive stripped lines that contain code
    c                Z    g | ](}t                               |j                  !|j        )S r1   REGEX_FOR_LINES_WITH_CONTENTmatchr+   rR   lspecifs     r3   r   z(filter_noncode_lines.<locals>.<listcomp>D  @       '--gl;;  r2   c                Z    g | ](}t                               |j                  !|j        )S r1   r   r   s     r3   r   z(filter_noncode_lines.<locals>.<listcomp>I  r   r2   c              3  (   K   | ]\  }}||k    V  d S r@   r1   )rR   sline_1sline_2s      r3   rT   z'filter_noncode_lines.<locals>.<genexpr>N  s,      XX&6gww'!XXXXXXr2   )r   rU   r   )r   r   r   r   r   stripped_l1stripped_l2s          r3   filter_noncode_linesr   /  s    * *9y?7R+RS  K
 *9y?7R+RS  K
 XX#k;:W:WXXXXXXr2   c                  V    e Zd ZU ded<   ded<   ded<   ded<   ded<   ded	<   ded
<   dS )Commonalityr<   cmn_lines_nbr   fst_lsetr&   fst_file_startfst_file_endsnd_lsetsnd_file_startsnd_file_endNr,   r1   r2   r3   r   r   Q  sf         r2   r   c                  r    e Zd ZdZeddddfd&dZ	 d'd(dZd)dZd*dZd+dZ	d,dZ
d-dZd.d Zd/d"Zd0d%ZdS )1Similarz-Finds copy-pasted lines of code in a project.F	min_linesr<   ignore_commentsrW   ignore_docstringsignore_importsignore_signaturesr=   r>   c                
   t          | t                    r| j        j        | _        nt          j                    | _        || j        _        || j        _        || j        _	        || j        _
        || j        _        g | _        d S r@   )rY   r   linterconfig	namespaceargparse	Namespacemin_similarity_linesr   r   r   r   linesets)rA   r   r   r   r   r   s         r3   rB   zSimilar.__init__^  sq     dK(( 	2![/DNN%/11DN.7+)8&+<((6%+<(')r2   Nstreamidr*   streamSTREAM_TYPESencoding
str | Nonec                   t          |t                    r|t          t          ||          j        }n|j        }	  |            }n# t
          $ r g }Y nw xY w| j                            t          ||| j	        j
        | j	        j        | j	        j        | j	        j        t          | d          r| j        j        nd                     dS )z)Append a file to search for similarities.Nr   line_enabled_callback)rY   r
   
ValueErrorr"   	readlinesUnicodeDecodeErrorr   r   r   r   r   r   r   r   hasattrr   _is_one_message_enabled)rA   r   r   r   r   rM   s         r3   append_streamzSimilar.append_streams  s     fn-- 	) !  '99CII(I	IKKEE! 	 	 	EEE	 	.0-04**'dk&I&I
 
 
	
 	
 	
 	
 	
s   
A AAc                x    | j         j        dk    rdS |                     |                                            dS )z=Start looking for similarities and display results on stdout.r   N)r   r   _display_sims_compute_simsr]   s    r3   runzSimilar.run  s?    >.!3 	F4--//00000r2   )list[tuple[int, set[LinesChunkLimits_T]]]c                   t          t                    }|                                 D ]m}|j        }|j        }|j        }|j        }|j        }|j        }|j	        }	||         }
|
D ]}|||f|v s|||	f|v r n|

                    |||f|||	fh           ng }|                                D ]!\  }}|D ]}|
                    ||f           "|                                 |                                 |S )z'Compute similarities in appended files.)r   r   
_iter_simsr   r   r   r   r   r   r   r   itemssortreverse)rA   no_duplicatescommonalitynumlineset1start_line_1
end_line_1lineset2start_line_2
end_line_2	duplicatecouplessims	ensemblescplss                  r3   r   zSimilar._compute_sims  s^   BMdBSBS??,, 	 	K*C"+H&5L$1J"+H&5L$1J%c*I$  lJ77B  G 	G 
 E   !<<!<<   ;=+1133 	) 	)NC! ) )S$K(((()		r2   similaritiesc                N    |                      |          }t          |           dS )z(Display computed similarities on stdout.N)_get_similarity_reportprint)rA   r   reports      r3   r   zSimilar._display_sims  s&     ,,\::fr2   c           
        d}d}|D ]\  }}|d| dt          |           dz  }t          |          }dx}x}}	|D ]\  }}}	|d|j         d| d	|	 d
z  }|rC|j        ||	         D ]3}
||
                                rd|
                                 dndz  }4||t          |          dz
  z  z  }t          d | j        D                       }|d| d| d|dz  |z  ddz  }|S )z"Create a report from similarities. r   
z similar lines in z files
N==:[:z]
z   r   c              3  4   K   | ]}t          |          V  d S r@   lenrR   r   s     r3   rT   z1Similar._get_similarity_report.<locals>.<genexpr>  s(      $O$OgS\\$O$O$O$O$O$Or2   zTOTAL lines=z duplicates=z	 percent=      Y@z.2f)r  sortedr   _real_linesrstriprU   r   )rA   r   r  duplicated_line_numbernumberr   	couples_lline_set
start_lineend_linelinetotal_line_numbers               r3   r   zSimilar._get_similarity_report  s    &'+ 		D 		DOFGK6KKS\\KKKKFwI/33H3zH2; K K.*hJx}JJ
JJXJJJJ Q$0H1DE Q QDP5DKKMM5555DPFF"fI0B&CC""!$$O$O$O$O$O!O!OR, R R0R R-58IIQR R R	

 r2   r   r   r   "Generator[Commonality, None, None]c           
   #    K   t          || j        j                  \  }t          || j        j                  \  }}t                                                    }t          |                                          }t          ||z  fd          }i }	t          |t          j        d                    D ]}
t          j	        |
         ||
                   D ]o}|d         }|d         }t          t          j        ||                   t          j        ||                   | j        j                  |	t          ||          <   pt          |	           |	                                D ]\  }}|j        }|j        }|j        }t%          |||j        j        |j        j        ||j        j        |j        j                  }t/          |||||          }|| j        j        k    r|V  dS )	at  Find similarities in the two given linesets.

        This the core of the algorithm. The idea is to compute the hashes of a
        minimal number of successive lines of each lineset and then compare the
        hashes. Every match of such comparison is stored in a dict that links the
        couple of starting indices in both linesets to the couple of corresponding
        starting and ending lines in both files.

        Last regroups all successive couples in a bigger one. It allows to take into
        account common chunk of lines that have more than the minimal number of
        successive lines required.
        c                     |          d         S Nr   r1   )mhash_to_index_1s    r3   <lambda>z&Similar._find_common.<locals>.<lambda>  s    ?1+=a+@ r2   keyrI   r   r   )r;   )r   r   r   r   r   r   r   N)r   r   r   	frozensetr   r  operator
attrgetter	itertoolsproductr7   copyrE   r   r   rx   ry   r;   r   r9   ri   rj   r:   r   )rA   r   r   index_to_lines_1hash_to_index_2index_to_lines_2hash_1hash_2common_hashesr   c_hashindices_in_linesetsindex_1index_2cml_stripped_lcmn_lstart_index_1start_index_2nb_common_linescom
eff_cmn_nbr  s                        @r3   _find_commonzSimilar._find_common  sD     & -9dn9-
 -
)) -9dn9-
 -
)) )2/2F2F2H2H(I(I(1/2F2F2H2H(I(I.4VO!@!@!@!@/
 /
 /
 -/]0CH0M0MNNN 	 	F'0'8')@( (  # .a0-a0 -I.w788I.w788+/>+N   &w88  	+&&&%0%6%6%8%8 	 	!NE*<M*<M#:O,!$/5"-1!$06".2  C .-=/ J DN?? 			)	 	r2   c              #     K   t          | j        dd                   D ]6\  }}| j        |dz   d         D ]}|                     ||          E d{V  7dS )zXIterate on similarities among all files, by making a Cartesian
        product.
        Nr   r   )r   r   r9  )rA   idxr   r   s       r3   r   zSimilar._iter_sims)  s       &dmCRC&899 	@ 	@LC M#'))4 @ @,,Wh??????????@	@ 	@r2   list[LineSet]c                    | j         S )zReturns the data we can use for a map/reduce process.

        In this case we are returning this instance's Linesets, that is all file
        information that will later be used for vectorisation.
        r   r]   s    r3   get_map_datazSimilar.get_map_data1  s     }r2   linesets_collectionlist[list[LineSet]]c                (    d |D             | _         dS )yReduces and recombines data into a format that we can report on.

        The partner function of get_map_data()
        c                    g | ]	}|D ]}|
S r1   r1   )rR   r   r  s      r3   r   z2Similar.combine_mapreduce_data.<locals>.<listcomp>>  s%    UUU'WUUTUUUUr2   Nr>  )rA   r@  s     r3   combine_mapreduce_datazSimilar.combine_mapreduce_data9  s    
 VU-@UUUr2   )r   r<   r   rW   r   rW   r   rW   r   rW   r=   r>   r@   )r   r*   r   r   r   r   r=   r>   r=   r>   )r=   r   )r   r   r=   r>   )r   r   r=   r*   )r   r   r   r   r=   r  )r=   r  r=   r<  )r@  rA  r=   r>   )r-   r.   r/   rC   DEFAULT_MIN_SIMILARITY_LINErB   r   r   r   r   r   r9  r   r?  rE  r1   r2   r3   r   r   [  s       77 5 %"'$"'* * * * *, KO
 
 
 
 
<1 1 1 1% % % %N      2I I I IV@ @ @ @   V V V V V Vr2   r   rM   rN   r   rW   r   r   r   r   !Callable[[str, int], bool] | Nonelist[LineSpecifs]c           
        |s|r't          j        d                    |                     }|r/d |j        D             }d t	          |d           D             }d}	|r2dfd g |          }
t          t          d |
D                        }g }d}t          | d          D ]O\  }}| |d|          s|                                }|r|s}|	                    d          s|	                    d          r|dd         }|dd         }n>|	                    d          s|	                    d          r|dd         }|dd         }|r|
                    |          rd}d}|r|                    ||	          }	|	rd}|r.|                    dd          d                                         }|r||v rd}|r4|                    t          |t          |dz
                                 Q|S )a  Return tuples of line/line number/line type with leading/trailing white-space and
    any ignored code features removed.

    :param lines: a collection of lines
    :param ignore_comments: if true, any comment in the lines collection is removed from the result
    :param ignore_docstrings: if true, any line that is a docstring is removed from the result
    :param ignore_imports: if true, any line that is an import is removed from the result
    :param ignore_signatures: if true, any line that is part of a function signature is removed from the result
    :param line_enabled_callback: If called with "R0801" and a line number, a return value of False will disregard the line
    :return: the collection of line/line number/line type tuples
    r  c              3  p   K   | ]1}|j         t          |t          j        t          j        f          fV  2d S r@   )linenorY   r   Import
ImportFrom)rR   nodes     r3   rT   z!stripped_lines.<locals>.<genexpr>W  sQ       $
 $
 [*TEL%:J+KLLM$
 $
 $
 $
 $
 $
r2   c                H    i | ]\  }}|t          d  |D                        S )c              3      K   | ]	\  }}|V  
d S r@   r1   )rR   _	is_imports      r3   rT   z,stripped_lines.<locals>.<dictcomp>.<genexpr>\  s&      KKla	KKKKKKr2   )all)rR   rM  node_is_import_groups      r3   
<dictcomp>z"stripped_lines.<locals>.<dictcomp>[  sG     
 
 
,, CKK6JKKKKK
 
 
r2   c                    | d         S r  r1   )r   s    r3   r  z stripped_lines.<locals>.<lambda>^  s
    ! r2   r   F	functionslist[nodes.NodeNG]treenodes.NodeNGr=   c                   |j         D ]z}t          |t          j        t          j        f          r|                     |           t          |t          j        t          j        t          j        f          r | |           {| S )zeRecursively get all functions including nested in the classes from the
            tree.
            )bodyrY   r   FunctionDefAsyncFunctionDefr   ClassDef)rY  r[  rP  _get_functionss      r3   rb  z&stripped_lines.<locals>._get_functionsd  s     	 4 4dU%68N$OPP +$$T***^U%68NO  4 #N9d333r2   c              3     K   | ]:}t          |j        |j        r|j        d          j        n	|j        dz             V  ;dS )r   r   N)r   rM  r^  tolineno)rR   funcs     r3   rT   z!stripped_lines.<locals>.<genexpr>z  sf        
 	 /3yO	!++dma>O      r2   Nr   )ri   R0801z"""z'''   zr"""zr'''r$   #r   )r+   r)   )rY  rZ  r[  r\  r=   rZ  )astroidparsejoinr^  r   setr   r   strip
startswithendswithgetsplitr   r(   r&   )rM   r   r   r   r   r   r[  node_is_import_by_linenoline_begins_importcurrent_line_is_importrY  signature_linesstrippedlines	docstringrM  r  rb  s                   @r3   r   r   A  s   &  -* -}RWWU^^,, '$
 $
	$
 $
 $
 
 
07(nn1 1 1
 
 
 "'  
	 	 	 	 	 	& #N2t,,	 
 !*  

 

 MI!%q111    	5J5JV6
 6
 	 zz|| 	 $??5)) $T__U-C-C $ $RaRI8DD__V,, $0G0G $ $QqS	I8D ==++ % $I 	%7%;%;.& &" &  	1::c1%%a(..00D 	?!: 	D 	  :fqj3I3IJJJ   r2   c                      e Zd ZdZ	 	 	 	 	 d d!dZd"dZd#dZd$dZd%dZd#dZ	d&dZ
ed'd            Zed(d            ZdS ))r   zHolds and indexes all the lines of a single source file.

    Allows for correspondence between real lines of the source file and stripped ones, which
    are the real ones from which undesired patterns have been removed.
    FNr   r*   rM   	list[str]r   rW   r   r   r   r   rI  r=   r>   c                V    || _         || _        t          ||||||          | _        d S )Nr   )r   r  r   _stripped_lines)rA   r   rM   r   r   r   r   r   s           r3   rB   zLineSet.__init__  sB     	 -"7 
  
  
r2   c                    d| j          dS )Nz<Lineset for >r   r]   s    r3   rb   zLineSet.__str__  s    +ty++++r2   r<   c                *    t          | j                  S r@   )r  r  r]   s    r3   __len__zLineSet.__len__  s    4#$$$r2   r   r(   c                    | j         |         S r@   r{  )rA   r   s     r3   __getitem__zLineSet.__getitem__  s    #E**r2   r{   c                "    | j         |j         k     S r@   r~  r}   s     r3   __lt__zLineSet.__lt__  s    y5:%%r2   c                     t          |           S r@   )idr]   s    r3   r^   zLineSet.__hash__  s    $xxr2   r   c                P    t          |t                    sdS | j        |j        k    S )NF)rY   r   __dict__r}   s     r3   r[   zLineSet.__eq__  s(    %)) 	5}..r2   rJ  c                    | j         S r@   r  r]   s    r3   r   zLineSet.stripped_lines  s    ##r2   c                    | j         S r@   )r  r]   s    r3   
real_lineszLineSet.real_lines  s    r2   )FFFFN)r   r*   rM   ry  r   rW   r   rW   r   rW   r   rW   r   rI  r=   r>   rd   rc   )r   r<   r=   r(   )r{   r   r=   rW   r   )r=   rJ  )r=   ry  )r-   r.   r/   rC   rB   rb   r  r  r  r^   r[   ru   r   r  r1   r2   r3   r   r     s          !&"'$"'CG
 
 
 
 
*, , , ,% % % %+ + + +& & & &   / / / /
 $ $ $ X$       X     r2   rf  )zSimilar lines in %s files
%szduplicate-codezIndicates that a set of similar lines has been detected among multiple file. This usually means that the code should be refactored to avoid this duplication.z!dict[str, MessageDefinitionTuple]MSGSsectr   statsr!   	old_statsLinterStats | Nonec                    g d}|t          ||d          z  }|                     t          |ddd                     dS )z0Make a layout with some stats about duplication.)r  nowprevious
differenceduplicated_linesr$   r   )childrencolsrheaderscheadersN)r   r   r   )r  r  r  rM   s       r3   report_similaritiesr    sP     211E	#E96HIIIEKKu1q1EEEFFFFFr2   c            
          e Zd ZU dZdZeZdeddddfdd	d
dddfdd	d
dddfdd	d
dddfdd	d
dddffZde	d<   dde
ffZd'dZd(dZd)dZd(d Zd*d"Zd+d%Zd&S ),SimilarCheckerzChecks for similarities and duplicated code.

    This computation may be memory / CPU intensive, so you
    should disable it if you experience some problems.
    r   zmin-similarity-linesr<   z<int>z%Minimum lines number of a similarity.)defaulttypemetavarhelpignore-commentsTynz<y or n>z4Comments are removed from the similarity computationignore-docstringsz6Docstrings are removed from the similarity computationignore-importsz3Imports are removed from the similarity computationignore-signaturesz6Signatures are removed from the similarity computationr    optionsRP0801Duplicationr   r#   r=   r>   c                   t          j        | |           t                              | | j        j        j        | j        j        j        | j        j        j        | j        j        j        | j        j        j	                   d S )N)r   r   r   r   r   )
r   rB   r   r   r   r   r   r   r   r   )rA   r   s     r3   rB   zSimilarChecker.__init__9  sv    #D&111k(= K.>"k0B;-<"k0B 	 	
 	
 	
 	
 	
r2   c                P    g | _         | j        j                                         dS )z=Init the checkers: reset linesets and statistics information.N)r   r   r  reset_duplicated_linesr]   s    r3   openzSimilarChecker.openD  s&    0022222r2   rP  nodes.Modulec                    | j         j        t          j        dt                     |                                5 }|                     | j         j        ||j                   ddd           dS # 1 swxY w Y   dS )zProcess a module.

        the module's content is accessible via the stream object

        stream must implement the readlines method
        NzIn pylint 3.0 the current_name attribute of the linter object should be a string. If unknown it should be initialized as an empty string.)r   current_namewarningswarnDeprecationWarningr   r   file_encoding)rA   rP  r   s      r3   process_modulezSimilarChecker.process_moduleI  s     ;# 	MN #   [[]] 	Uft{7ASTTT	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	U 	Us   'A//A36A3c                   t          d | j        D                       }d}| j        j        }|                                 D ]\  }}g }dx}x}}	|D ]*\  }}}	|                    d|j         d| d|	 d           +|                                 |r9|j        ||	         D ])}
|                    |
	                                           *| 
                    dt          |          d	                    |          f
           ||t          |          dz
  z  z  }|xj        t          |          z  c_        |xj        t!          |o|dz  |z            z  c_        dS )zBCompute and display similarities on closing (i.e. end of parsing).c              3  4   K   | ]}t          |          V  d S r@   r
  r  s     r3   rT   z'SimilarChecker.close.<locals>.<genexpr>]  s(      >>WCLL>>>>>>r2   r   Nr  r  r  ]rf  r  )argsr   r  )rU   r   r   r  r   r   r   r   r  r  add_messager  rk  nb_duplicated_linesr<   percent_duplicated_linesfloat)rA   total
duplicatedr  r   r   msgr   r  r  r  s              r3   closezSimilarChecker.close[  s   >>>>>>>
! ..00 	3 	3LCC.22G2j818 J J-X

HHH
HHXHHHIIIIHHJJJ .#.z(/BC . .DJJt{{}}----WCLL$))C..+IJJJ#W!122JJ!!S__4!!&&%0T*u:Lu:T*U*UU&&&&r2   r<  c                6    t                               |           S )zPassthru override.)r   r?  r]   s    r3   r?  zSimilarChecker.get_map_datap  s    ##D)))r2   datarA  c                >    t                               | |           dS )rC  )r@  N)r   rE  )rA   r   r  s      r3   reduce_map_datazSimilarChecker.reduce_map_datat  s#    
 	&&t&FFFFFr2   Nr   r#   r=   r>   rF  )rP  r  r=   r>   rG  )r   r#   r  rA  r=   r>   )r-   r.   r/   rC   r   r  msgsrH  r  r0   r  reportsrB   r  r  r  r?  r  r1   r2   r3   r  r    s          DD
 #6"?	 	
 %N	 	
  %P	 	
 %M	 	
  %P	 	
K.G . . . .` -)<=?G	
 	
 	
 	
3 3 3 3
U U U U$V V V V** * * *G G G G G Gr2   r  r   r#   c                J    |                      t          |                      d S r@   )register_checkerr  )r   s    r3   registerr  |  s$    
N62233333r2   statusr   c                    t          d           t                       t          d           t          j        |            dS )z'Display command line usage information.z*finds copy pasted blocks in a set of fileszUsage: symilar [-d|--duplicates min_duplicated_lines] [-i|--ignore-comments] [--ignore-docstrings] [--ignore-imports] [--ignore-signatures] file1...N)r  sysexit)r  s    r3   usager    sJ    	
6777	GGG		`   HVr2   argvSequence[str] | Nonec                R   | t           j        dd         } d}g d}t          }d}d}d}d}t          t	          |           ||          \  }}	|D ]G\  }
}|
dv rt          |          }|
dv rt                       ,|
dv rd	}3|
d
v rd	}:|
dv rd	}A|
dv rd	}H|	st          d           t          |||||          }|	D ]A}t          |d          5 }|	                    ||           ddd           n# 1 swxY w Y   B|
                                 t          j        d           dS )z%Standalone command line access point.Nr   hdi)r  zduplicates=r  r  r  r  F>   -d--duplicates>   -h--help>   -i--ignore-commentsT>   --ignore-docstrings>   --ignore-imports>   --ignore-signatureszutf-8)r   r   )r  r  rH  r	   r   r<   r  r   r  r   r   r  )r  s_optsl_optsr   r   r   r   r   optsr  optvalsimfilenamer   s                  r3   Runr    s    x|F  F ,IONT

FF33JD$ % %S(( 	%CII$$ 		%GGGG// 	%"OO++ 	% $(( 	%!NN++ 	% $ a
?$5~GX C  0 0(W--- 	0h///	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0GGIIIHQKKKKKs   C33C7	:C7	__main__)r   r   r   r<   r=   r   )r   r   r=   r>   )r   r   r   r%   r   r   r   r%   r   r<   r=   r<   r@   )rM   rN   r   rW   r   rW   r   rW   r   rW   r   rI  r=   rJ  )r  r   r  r!   r  r  r=   r>   r  )r   )r  r<   r=   r   )r  r  r=   r   )UrC   
__future__r   r   r'  	functoolsr%  r#  rer  r  collectionsr   collections.abcr   r   r   r   r	   ior
   r   r   r   r   typingr   r   r   r   r   r   r   r   r   r   ri  r   pylint.checkersr   r   r   pylint.reporters.ureports.nodesr   r   pylint.typingr   r    pylint.utilsr!   r"   pylint.lintr#   rH  compiler   r<   r%   r&   r(   HashToIndex_TIndexToLines_Tr   r7   r   r4   r5   rE   LinesChunkLimits_Tr   r   r   r   r   r   total_orderingr   r  r0   r  r  r  r  r  r-   r1   r2   r3   <module>r     sx  
  2 # " " " " "            				 



  # # # # # # C C C C C C C C C C C C       6 6 6 6 6 6 6 6 6 6 $ $ $ $ $ $ $ $                               S S S S S S S S S S : : : : : : : : 9 9 9 9 9 9 9 9 5 5 5 5 5 5 5 5 %$$$$$$ )rz(33  	 W\3''
    *    \4;./ e445 V^W45= = = = = = = =( 02JJK $
 $
 $
 $
 $
 $
 $
 $
NF F F F F F F F8
 
 
 
 
 
 
 
: 9j*<=  /J&# &# &# &# &#R( ( ( (VY Y Y YD    *   cV cV cV cV cV cV cV cVX @De e e e eP 6  6  6  6  6  6  6  6 t  +    G G G GG G G G G' G G GD4 4 4 4    * * * * *Z z 
CEEEEE
 
r2   