o
    h9?                     @  sN  d Z ddlmZ ddlZddlZddlZddlZddlZddl	Z	ddl
Z
ddlZddlZddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6 e	7e8Z9ee:e:f Z;dSddZ<G dd de=Z>dTd!d"Z?G d#d$ d$e=Z@dUd'd(ZAdVd)d*ZBdWd-d.ZCG d/d0 d0ZDG d1d2 d2eZEdXd4d5ZFeFdYd9d:ZGed;d<G d=d7 d7ZHG d>d? d?eZI	dZd[dFdGZJ	;d\d]dJdKZKd^dMdNZLG dOdP dPeZMG dQdR dRZNdS )_zO
The main purpose of this module is to expose LinkCollector.collect_sources().
    )annotationsN)IterableMutableMappingSequence)	dataclass)
HTMLParser)Values)Callable
NamedTupleProtocol)requests)Response)
RetryErrorSSLError)NetworkConnectionError)Link)SearchScope)
PipSession)raise_for_status)is_archive_fileredact_auth_from_url)vcs   )CandidatesFromPage
LinkSourcebuild_sourceurlstrreturn
str | Nonec                 C  s6   t jD ]}|  |r| t| dv r|  S qdS )zgLook for VCS schemes in the URL.

    Returns the matched VCS scheme, or None if there's no match.
    z+:N)r   schemeslower
startswithlen)r   scheme r&   P/var/www/html/venv/lib/python3.10/site-packages/pip/_internal/index/collector.py_match_vcs_scheme.   s
   
r(   c                      s   e Zd Zd fddZ  ZS )	_NotAPIContentcontent_typer   request_descr   Nonec                   s   t  || || _|| _d S N)super__init__r*   r+   )selfr*   r+   	__class__r&   r'   r/   :   s   
z_NotAPIContent.__init__)r*   r   r+   r   r   r,   )__name__
__module____qualname__r/   __classcell__r&   r&   r1   r'   r)   9   s    r)   responser   r,   c                 C  s2   | j dd}| }|drdS t|| jj)z
    Check the Content-Type header to ensure the response contains a Simple
    API Response.

    Raises `_NotAPIContent` if the content type is not a valid content-type.
    Content-TypeUnknown)z	text/htmlz#application/vnd.pypi.simple.v1+html#application/vnd.pypi.simple.v1+jsonN)headersgetr"   r#   r)   requestmethod)r7   r*   content_type_lr&   r&   r'   _ensure_api_header@   s   r@   c                   @  s   e Zd ZdS )_NotHTTPN)r3   r4   r5   r&   r&   r&   r'   rA   V   s    rA   sessionr   c                 C  sF   t j| \}}}}}|dvrt |j| dd}t| t| dS )z
    Send a HEAD request to the URL, and ensure the response contains a simple
    API Response.

    Raises `_NotHTTP` if the URL is not available for a HEAD request, or
    `_NotAPIContent` if the content type is not a valid content type.
    >   httphttpsT)allow_redirectsN)urllibparseurlsplitrA   headr   r@   )r   rB   r%   netlocpathqueryfragmentrespr&   r&   r'   _ensure_api_responseZ   s   rO   c                 C  sx   t t| jrt| |d tdt|  |j| dg dddd}t	| t
| tdt| |jd	d
 |S )aY  Access an Simple API response with GET, and return the response.

    This consists of three parts:

    1. If the URL looks suspiciously like an archive, send a HEAD first to
       check the Content-Type is HTML or Simple API, to avoid downloading a
       large file. Raise `_NotHTTP` if the content type cannot be determined, or
       `_NotAPIContent` if it is not HTML or a Simple API.
    2. Actually perform the request. Raise HTTP exceptions on network failures.
    3. Check the Content-Type header to make sure we got a Simple API response,
       and raise `_NotAPIContent` otherwise.
    rB   zGetting page %sz, )r:   z*application/vnd.pypi.simple.v1+html; q=0.1ztext/html; q=0.01z	max-age=0)AcceptzCache-Control)r;   zFetched page %s as %sr8   r9   )r   r   filenamerO   loggerdebugr   r<   joinr   r@   r;   )r   rB   rN   r&   r&   r'   _get_simple_responsel   s&   rV   r;   ResponseHeadersc                 C  s<   | rd| v rt j }| d |d< |d}|rt|S dS )z=Determine if we have any encoding information in our headers.r8   zcontent-typecharsetN)emailmessageMessage	get_paramr   )r;   mrX   r&   r&   r'   _get_encoding_from_headers   s   

r^   c                   @  s*   e Zd ZdddZdd
dZdddZdS )CacheablePageContentpageIndexContentr   r,   c                 C  s   |j sJ || _d S r-   )cache_link_parsingr`   r0   r`   r&   r&   r'   r/      s   

zCacheablePageContent.__init__otherobjectboolc                 C  s   t |t| o| jj|jjkS r-   )
isinstancetyper`   r   )r0   rd   r&   r&   r'   __eq__   s   zCacheablePageContent.__eq__intc                 C  s   t | jjS r-   )hashr`   r   r0   r&   r&   r'   __hash__   s   zCacheablePageContent.__hash__N)r`   ra   r   r,   )rd   re   r   rf   )r   rj   )r3   r4   r5   r/   ri   rm   r&   r&   r&   r'   r_      s    

r_   c                   @  s   e Zd ZdddZdS )	
ParseLinksr`   ra   r   Iterable[Link]c                 C  s   d S r-   r&   rc   r&   r&   r'   __call__   s    zParseLinks.__call__Nr`   ra   r   ro   )r3   r4   r5   rp   r&   r&   r&   r'   rn      s    rn   fnc                   s2   t jd fddt  d fd	d
}|S )z
    Given a function that parses an Iterable[Link] from an IndexContent, cache the
    function's result (keyed by CacheablePageContent), unless the IndexContent
    `page` has `page.cache_link_parsing == False`.
    cacheable_pager_   r   
list[Link]c                   s   t  | jS r-   )listr`   )rs   )rr   r&   r'   wrapper   s   z*with_cached_index_content.<locals>.wrapperr`   ra   c                   s   | j r	t| S t | S r-   )rb   r_   ru   )r`   rr   rv   r&   r'   wrapper_wrapper   s   z2with_cached_index_content.<locals>.wrapper_wrapperN)rs   r_   r   rt   )r`   ra   r   rt   )	functoolscachewraps)rr   rx   r&   rw   r'   with_cached_index_content   s
   r|   r`   ra   ro   c           
      c  s    | j  }|dr+t| j}|dg D ]}t|| j	}|du r%q|V  qdS t
| j	}| jp4d}|| j| | j	}|jpE|}|jD ]}	tj|	||d}|du rXqI|V  qIdS )z\
    Parse a Simple API's Index Content, and yield its anchor elements as Link objects.
    r:   filesNzutf-8)page_urlbase_url)r*   r"   r#   jsonloadscontentr<   r   	from_jsonr   HTMLLinkParserencodingfeeddecoder   anchorsfrom_element)
r`   r?   datafilelinkparserr   r   r   anchorr&   r&   r'   parse_links   s*   





r   T)frozenc                   @  sH   e Zd ZU dZded< ded< ded< ded< d	Zd
ed< dddZdS )ra   a  Represents one response (or page), along with its URL.

    :param encoding: the encoding to decode the given content.
    :param url: the URL from which the HTML was downloaded.
    :param cache_link_parsing: whether links parsed from this page's url
                               should be cached. PyPI index urls should
                               have this set to False, for example.
    bytesr   r   r*   r    r   r   Trf   rb   r   c                 C  s
   t | jS r-   )r   r   rl   r&   r&   r'   __str__  s   
zIndexContent.__str__N)r   r   )r3   r4   r5   __doc____annotations__rb   r   r&   r&   r&   r'   ra      s   
 	c                      s6   e Zd ZdZd fddZdddZdddZ  ZS )r   zf
    HTMLParser that keeps the first base HREF and a list of all anchor
    elements' attributes.
    r   r   r   r,   c                   s$   t  jdd || _d | _g | _d S )NT)convert_charrefs)r.   r/   r   r   r   )r0   r   r1   r&   r'   r/     s   
zHTMLLinkParser.__init__tagattrslist[tuple[str, str | None]]c                 C  sR   |dkr| j d u r| |}|d ur|| _ d S d S |dkr'| jt| d S d S )Nbasea)r   get_hrefr   appenddict)r0   r   r   hrefr&   r&   r'   handle_starttag  s   

zHTMLLinkParser.handle_starttagr    c                 C  s"   |D ]\}}|dkr|  S qd S )Nr   r&   )r0   r   namevaluer&   r&   r'   r   !  s
   zHTMLLinkParser.get_href)r   r   r   r,   )r   r   r   r   r   r,   )r   r   r   r    )r3   r4   r5   r   r/   r   r   r6   r&   r&   r1   r'   r     s
    
r   r   r   reasonstr | ExceptionmethCallable[..., None] | Nonec                 C  s   |d u rt j}|d| | d S )Nz%Could not fetch URL %s: %s - skipping)rS   rT   )r   r   r   r&   r&   r'   _handle_get_simple_fail(  s   r   rb   rf   c                 C  s&   t | j}t| j| jd || j|dS )Nr8   )r   r   rb   )r^   r;   ra   r   r   )r7   rb   r   r&   r&   r'   _make_index_content2  s   
r   IndexContent | Nonec          
   
   C  s  | j ddd }t|}|rtd||  d S tj|\}}}}}}|dkrHtj	
tj|rH|ds;|d7 }tj|d}td| zt||d	}W n ty`   td
|  Y d S  ty| } ztd| |j|j W Y d }~d S d }~w ty } zt| | W Y d }~d S d }~w ty } zt| | W Y d }~d S d }~w ty } zd}	|	t|7 }	t| |	tjd W Y d }~d S d }~w tjy } zt| d|  W Y d }~d S d }~w tjy   t| d Y d S w t|| jdS )N#r   r   zICannot look at %s URL %s because it does not support lookup as web pages.r   /z
index.htmlz# file: URL is directory, getting %srP   z`Skipping page %s because it looks like an archive, and cannot be checked by a HTTP HEAD request.zSkipping page %s because the %s request got Content-Type: %s. The only supported Content-Types are application/vnd.pypi.simple.v1+json, application/vnd.pypi.simple.v1+html, and text/htmlz4There was a problem confirming the ssl certificate: )r   zconnection error: z	timed out)rb   ) r   splitr(   rS   warningrF   rG   urlparseosrK   isdirr=   url2pathnameendswithurljoinrT   rV   rA   r)   r+   r*   r   r   r   r   r   infor   ConnectionErrorTimeoutr   rb   )
r   rB   r   
vcs_schemer%   _rK   rN   excr   r&   r&   r'   _get_index_content?  sn   
r   c                   @  s   e Zd ZU ded< ded< dS )CollectedSourceszSequence[LinkSource | None]
find_links
index_urlsN)r3   r4   r5   r   r&   r&   r&   r'   r   |  s   
 r   c                   @  sN   e Zd ZdZd!dd	Ze	
d"d#ddZed$ddZd%ddZ	d&ddZ
d S )'LinkCollectorz
    Responsible for collecting Link objects from all configured locations,
    making network requests as needed.

    The class's main method is its collect_sources() method.
    rB   r   search_scoper   r   r,   c                 C  s   || _ || _d S r-   )r   rB   )r0   rB   r   r&   r&   r'   r/     s   
zLinkCollector.__init__Foptionsr   suppress_no_indexrf   c                 C  sd   |j g|j }|jr|stdddd |D  g }|jp g }tj|||jd}t	||d}|S )z
        :param session: The Session to use to make requests.
        :param suppress_no_index: Whether to ignore the --no-index option
            when constructing the SearchScope object.
        zIgnoring indexes: %s,c                 s  s    | ]}t |V  qd S r-   r   ).0r   r&   r&   r'   	<genexpr>  s    z'LinkCollector.create.<locals>.<genexpr>)r   r   no_index)rB   r   )
	index_urlextra_index_urlsr   rS   rT   rU   r   r   creater   )clsrB   r   r   r   r   r   link_collectorr&   r&   r'   r     s$   

zLinkCollector.create	list[str]c                 C  s   | j jS r-   )r   r   rl   r&   r&   r'   r     s   zLinkCollector.find_linkslocationr   r   c                 C  s   t || jdS )z>
        Fetch an HTML page containing package links.
        rP   )r   rB   )r0   r   r&   r&   r'   fetch_response  s   zLinkCollector.fetch_responseproject_namer   candidates_from_pager   r   c                   s   t  fddjD  }t  fddjD  }ttj	rKdd t
||D }t| d dg| }td| tt|t|d	S )
Nc              	   3  (    | ]}t | jjd d dV  qdS )Fr   page_validator
expand_dirrb   r   Nr   rB   is_secure_originr   locr   r   r0   r&   r'   r         	
z0LinkCollector.collect_sources.<locals>.<genexpr>c              	   3  r   )Tr   Nr   r   r   r&   r'   r     r   c                 S  s*   g | ]}|d ur|j d urd|j  qS )Nz* )r   )r   sr&   r&   r'   
<listcomp>  s
    
z1LinkCollector.collect_sources.<locals>.<listcomp>z' location(s) to search for versions of :
)r   r   )collectionsOrderedDictr   get_index_urls_locationsvaluesr   rS   isEnabledForloggingDEBUG	itertoolschainr$   rT   rU   r   ru   )r0   r   r   index_url_sourcesfind_links_sourceslinesr&   r   r'   collect_sources  s2   
	
	


zLinkCollector.collect_sourcesN)rB   r   r   r   r   r,   )F)rB   r   r   r   r   rf   r   r   )r   r   )r   r   r   r   )r   r   r   r   r   r   )r3   r4   r5   r   r/   classmethodr   propertyr   r   r   r&   r&   r&   r'   r     s    
!
r   )r   r   r   r    )r7   r   r   r,   )r   r   rB   r   r   r,   )r   r   rB   r   r   r   )r;   rW   r   r    )rr   rn   r   rn   rq   r-   )r   r   r   r   r   r   r   r,   )T)r7   r   rb   rf   r   ra   )r   r   rB   r   r   r   )Or   
__future__r   r   email.messagerY   ry   r   r   r   r   urllib.parserF   urllib.requestcollections.abcr   r   r   dataclassesr   html.parserr   optparser   typingr	   r
   r   pip._vendorr   pip._vendor.requestsr   pip._vendor.requests.exceptionsr   r   pip._internal.exceptionsr   pip._internal.models.linkr   !pip._internal.models.search_scoper   pip._internal.network.sessionr   pip._internal.network.utilsr   pip._internal.utils.filetypesr   pip._internal.utils.miscr   pip._internal.vcsr   sourcesr   r   r   	getLoggerr3   rS   r   rW   r(   	Exceptionr)   r@   rA   rO   rV   r^   r_   rn   r|   r   ra   r   r   r   r   r   r   r&   r&   r&   r'   <module>   sh    





?

=