Source code for bigframes.series

# Copyright 2023 Google LLC## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at##     http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License."""Series is a 1 dimensional data structure."""from__future__importannotationsimportdatetimeimportfunctoolsimportinspectimportitertoolsimportnumbersimporttextwrapimporttypingfromtypingimport(Any,Callable,cast,Iterable,List,Literal,Mapping,Optional,overload,Sequence,Tuple,Union,)importwarningsimportbigframes_vendored.constantsasconstantsimportbigframes_vendored.pandas.core.seriesasvendored_pandas_seriesimportgoogle.cloud.bigqueryasbigqueryimportnumpyimportpandasfrompandas.apiimportextensionsaspd_extimportpyarrowaspaimporttyping_extensionsimportbigframes.corefrombigframes.coreimportagg_expressions,groupby,log_adapterimportbigframes.core.block_transformsasblock_opsimportbigframes.core.blocksasblocksimportbigframes.core.expressionaseximportbigframes.core.identifiersasidsimportbigframes.core.indexersimportbigframes.core.indexesasindexesimportbigframes.core.orderingasorderimportbigframes.core.scalarasscalarsimportbigframes.core.utilsasutilsimportbigframes.core.validationsasvalidationsimportbigframes.core.windowfrombigframes.core.windowimportrollingimportbigframes.core.window_specaswindowsimportbigframes.dataframeimportbigframes.dtypesimportbigframes.exceptionsasbfeimportbigframes.formatting_helpersasformatterimportbigframes.functionsimportbigframes.operationsasopsimportbigframes.operations.aggregationsasagg_opsimportbigframes.operations.blobasblobimportbigframes.operations.datetimesasdtimportbigframes.operations.listsaslistsimportbigframes.operations.plottingasplottingimportbigframes.operations.python_op_mapsaspython_opsimportbigframes.operations.structsasstructsimportbigframes.sessioniftyping.TYPE_CHECKING:importbigframes.geopandas.geoseriesimportbigframes.operations.stringsasstringsLevelType=typing.Union[str,int]LevelsType=typing.Union[LevelType,typing.Sequence[LevelType]]_bigquery_function_recommendation_message=("Your functions could not be applied directly to the Series."" Try converting it to a BigFrames BigQuery function.")_list=list# Type alias to escape Series.list property[docs]@log_adapter.class_loggerclassSeries(vendored_pandas_series.Series):# Must be above 5000 for pandas to delegate to bigframes for binops__pandas_priority__=13000# Ensure mypy can more robustly determine the type of self._block since it# gets set in various places._block:blocks.Block[docs]def__init__(self,data=None,index=None,dtype:Optional[bigframes.dtypes.DtypeString|bigframes.dtypes.Dtype]=None,name:str|None=None,copy:Optional[bool]=None,*,session:Optional[bigframes.session.Session]=None,):self._query_job:Optional[bigquery.QueryJob]=Noneimportbigframes.pandas# Ignore object dtype if provided, as it provides no additional# information about what BigQuery type to use.ifdtypeisnotNoneandbigframes.dtypes.is_object_like(dtype):dtype=Noneread_pandas_func=(session.read_pandasif(sessionisnotNone)else(lambdax:bigframes.pandas.read_pandas(x)))block:typing.Optional[blocks.Block]=Noneif(nameisnotNone)andnotisinstance(name,typing.Hashable):raiseValueError(f"BigQuery DataFrames only supports hashable series names.{constants.FEEDBACK_LINK}")ifcopyisnotNoneandnotcopy:raiseValueError(f"Series constructor only supports copy=True.{constants.FEEDBACK_LINK}")ifisinstance(data,blocks.Block):block=dataelifisinstance(data,bigframes.pandas.Series):block=data._get_block()# special case where data is local scalar, but index is bigframes index (maybe very big)elif(notutils.is_list_like(data)andnotisinstance(data,indexes.Index))andisinstance(index,indexes.Index):block=index._blockblock,_=block.create_constant(data)block=block.with_column_labels([None])# prevents no-op reindex laterindex=Noneelifisinstance(data,indexes.Index)orisinstance(index,indexes.Index):data=indexes.Index(data,dtype=dtype,name=name,session=session)# set to none as it has already been applied, avoid re-cast laterifdata.nlevels!=1:raiseNotImplementedError("Cannot interpret multi-index as Series.")# Reset index to promote index columns to value columns, set default indexdata_block=data._block.reset_index(drop=False).with_column_labels(data.names)ifindexisnotNone:# Align data and index by offsetbf_index=indexes.Index(index,session=session)idx_block=bf_index._block.reset_index(drop=False)# reset to align by offsets, and then reset backidx_cols=idx_block.value_columnsdata_block,(l_mapping,_)=idx_block.join(data_block,how="left")data_block=data_block.set_index([l_mapping[col]forcolinidx_cols])data_block=data_block.with_index_labels(bf_index.names)# prevents no-op reindex laterindex=Noneblock=data_blockifblock:assertlen(block.value_columns)==1assertlen(block.column_labels)==1ifindexisnotNone:# reindexing operationbf_index=indexes.Index(index)idx_block=bf_index._blockidx_cols=idx_block.index_columnsblock,_=idx_block.join(block,how="left")block=block.with_index_labels(bf_index.names)ifname:block=block.with_column_labels([name])ifdtype:bf_dtype=bigframes.dtypes.bigframes_type(dtype)block=block.multi_apply_unary_op(ops.AsTypeOp(to_type=bf_dtype))else:ifisinstance(dtype,str)anddtype.lower()=="json":dtype=bigframes.dtypes.JSON_DTYPEpd_series=pandas.Series(data=data,index=index,# type:ignoredtype=dtype,# type:ignorename=name,)block=read_pandas_func(pd_series)._get_block()# type:ignoreassertblockisnotNoneself._block:blocks.Block=blockself._block.session._register_object(self)
@propertydefdt(self)->dt.DatetimeMethods:returndt.DatetimeMethods(self)@propertydefdtype(self):bigframes.dtypes.warn_on_db_dtypes_json_dtype([self._dtype])returnself._dtype@propertydefdtypes(self):bigframes.dtypes.warn_on_db_dtypes_json_dtype([self._dtype])returnself._dtype@propertydefgeo(self)->bigframes.geopandas.geoseries.GeoSeries:"""        Accessor object for geography properties of the Series values.        Returns:            bigframes.geopandas.geoseries.GeoSeries:                An accessor containing geography methods.        """importbigframes.geopandas.geoseriesreturnbigframes.geopandas.geoseries.GeoSeries(self)@property@validations.requires_indexdefloc(self)->bigframes.core.indexers.LocSeriesIndexer:returnbigframes.core.indexers.LocSeriesIndexer(self)@property@validations.requires_ordering()defiloc(self)->bigframes.core.indexers.IlocSeriesIndexer:returnbigframes.core.indexers.IlocSeriesIndexer(self)@property@validations.requires_ordering()defiat(self)->bigframes.core.indexers.IatSeriesIndexer:returnbigframes.core.indexers.IatSeriesIndexer(self)@property@validations.requires_indexdefat(self)->bigframes.core.indexers.AtSeriesIndexer:returnbigframes.core.indexers.AtSeriesIndexer(self)@propertydefname(self)->blocks.Label:returnself._name@name.setterdefname(self,label:blocks.Label):new_block=self._block.with_column_labels([label])self._set_block(new_block)@propertydefshape(self)->typing.Tuple[int]:return(self._block.shape[0],)@propertydefsize(self)->int:returnself.shape[0]@propertydefndim(self)->int:return1@propertydefempty(self)->bool:returnself.shape[0]==0@propertydefhasnans(self)->bool:# Note, hasnans is actually a null check, and NaNs don't count for nullable floatreturnself.isnull().any()@propertydefvalues(self)->numpy.ndarray:returnself.to_numpy()@property@validations.requires_indexdefindex(self)->indexes.Index:returnindexes.Index.from_frame(self)[docs]@validations.requires_indexdefkeys(self)->indexes.Index:returnself.index
@propertydefquery_job(self)->Optional[bigquery.QueryJob]:"""BigQuery job metadata for the most recent query.        Returns:            The most recent `QueryJob            <https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob>`_.        """ifself._query_jobisNone:self._set_internal_query_job(self._compute_dry_run())returnself._query_job@propertydefstruct(self)->structs.StructAccessor:returnstructs.StructAccessor(self)@propertydeflist(self)->lists.ListAccessor:returnlists.ListAccessor(self)@propertydefblob(self)->blob.BlobAccessor:returnblob.BlobAccessor(self)@property@validations.requires_ordering()defT(self)->Series:returnself.transpose()@propertydef_info_axis(self)->indexes.Index:returnself.index@propertydef_session(self)->bigframes.Session:returnself._get_block().expr.session@propertydef_struct_fields(self)->List[str]:ifnotbigframes.dtypes.is_struct_like(self._dtype):return[]struct_type=typing.cast(pa.StructType,self._dtype.pyarrow_dtype)return[struct_type.field(i).nameforiinrange(struct_type.num_fields)][docs]@validations.requires_ordering()deftranspose(self)->Series:returnself
def_set_internal_query_job(self,query_job:Optional[bigquery.QueryJob]):self._query_job=query_jobdef__len__(self):returnself.shape[0]__len__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__len__)def__iter__(self)->typing.Iterator:returnitertools.chain.from_iterable(map(lambdax:x.squeeze(axis=1),self._block.to_pandas_batches()))def__contains__(self,key)->bool:returnkeyinself.index[docs]defcopy(self)->Series:returnSeries(self._block)
@overloaddefrename(self,index:Union[blocks.Label,Mapping[Any,Any]]=None,)->Series:...@overloaddefrename(self,index:Union[blocks.Label,Mapping[Any,Any]]=None,*,inplace:Literal[False],**kwargs,)->Series:...@overloaddefrename(self,index:Union[blocks.Label,Mapping[Any,Any]]=None,*,inplace:Literal[True],**kwargs,)->None:...[docs]defrename(self,index:Union[blocks.Label,Mapping[Any,Any]]=None,*,inplace:bool=False,**kwargs,)->Optional[Series]:iflen(kwargs)!=0:raiseNotImplementedError(f"rename does not currently support any keyword arguments.{constants.FEEDBACK_LINK}")# rename the indexifisinstance(index,Mapping):index=typing.cast(Mapping[Any,Any],index)block=self._blockfork,vinindex.items():new_idx_ids=[]foridx_id,idx_dtypeinzip(block.index_columns,block.index.dtypes):# Will throw if key type isn't compatible with index type, which leads to invalid SQL.block.create_constant(k,dtype=idx_dtype)# Will throw if value type isn't compatible with index type.block,const_id=block.create_constant(v,dtype=idx_dtype)block,cond_id=block.project_expr(ops.ne_op.as_expr(idx_id,ex.const(k)))block,new_idx_id=block.apply_ternary_op(idx_id,cond_id,const_id,ops.where_op)new_idx_ids.append(new_idx_id)block=block.drop_columns([const_id,cond_id])block=block.set_index(new_idx_ids,index_labels=block.index.names)ifinplace:self._block=blockreturnNoneelse:returnSeries(block)# rename the Series nameifisinstance(index,typing.Hashable):# Python 3.9 doesn't allow isinstance of Optionalindex=typing.cast(Optional[str],index)block=self._block.with_column_labels([index])ifinplace:self._block=blockreturnNoneelse:returnSeries(block)raiseValueError(f"Unsupported type of parameter index:{type(index)}")
@overloaddefrename_axis(self,mapper:typing.Union[blocks.Label,typing.Sequence[blocks.Label]],)->Series:...@overloaddefrename_axis(self,mapper:typing.Union[blocks.Label,typing.Sequence[blocks.Label]],*,inplace:Literal[False],**kwargs,)->Series:...@overloaddefrename_axis(self,mapper:typing.Union[blocks.Label,typing.Sequence[blocks.Label]],*,inplace:Literal[True],**kwargs,)->None:...[docs]@validations.requires_indexdefrename_axis(self,mapper:typing.Union[blocks.Label,typing.Sequence[blocks.Label]],*,inplace:bool=False,**kwargs,)->Optional[Series]:iflen(kwargs)!=0:raiseNotImplementedError(f"rename_axis does not currently support any keyword arguments.{constants.FEEDBACK_LINK}")# limited implementation: the new index name is simply the 'mapper' parameterif_is_list_like(mapper):labels=mapperelse:labels=[mapper]block=self._block.with_index_labels(labels)ifinplace:self._block=blockreturnNoneelse:returnSeries(block)
[docs]defequals(self,other:typing.Union[Series,bigframes.dataframe.DataFrame])->bool:# Must be same object type, same column dtypes, and same label valuesifnotisinstance(other,Series):returnFalsereturnblock_ops.equals(self._block,other._block)
@overload# type: ignore[override]defreset_index(self,level:blocks.LevelsType=...,*,name:typing.Optional[str]=...,drop:Literal[False]=...,inplace:Literal[False]=...,allow_duplicates:Optional[bool]=...,)->bigframes.dataframe.DataFrame:...@overloaddefreset_index(self,level:blocks.LevelsType=...,*,name:typing.Optional[str]=...,drop:Literal[True]=...,inplace:Literal[False]=...,allow_duplicates:Optional[bool]=...,)->Series:...@overloaddefreset_index(self,level:blocks.LevelsType=...,*,name:typing.Optional[str]=...,drop:bool=...,inplace:Literal[True]=...,allow_duplicates:Optional[bool]=...,)->None:...[docs]@validations.requires_ordering()defreset_index(self,level:blocks.LevelsType=None,*,name:typing.Optional[str]=None,drop:bool=False,inplace:bool=False,allow_duplicates:Optional[bool]=None,)->bigframes.dataframe.DataFrame|Series|None:ifallow_duplicatesisNone:allow_duplicates=Falseblock=self._block.reset_index(level,drop,allow_duplicates=allow_duplicates)ifdrop:ifinplace:self._set_block(block)returnNonereturnSeries(block)else:ifinplace:raiseValueError("Series.reset_index cannot combine inplace=True and drop=False")ifname:block=block.assign_label(self._value_column,name)returnbigframes.dataframe.DataFrame(block)
def__repr__(self)->str:# Protect against errors with uninitialized Series. See:# https://github.com/googleapis/python-bigquery-dataframes/issues/728ifnothasattr(self,"_block"):returnobject.__repr__(self)# TODO(swast): Add a timeout here? If the query is taking a long time,# maybe we just print the job metadata that we have so far?# TODO(swast): Avoid downloading the whole series by using job# metadata, like we do with DataFrame.opts=bigframes.options.displaymax_results=opts.max_rows# anywdiget mode uses the same display logic as the "deferred" mode# for faster executionifopts.repr_modein("deferred","anywidget"):returnformatter.repr_query_job(self._compute_dry_run())self._cached()pandas_df,_,query_job=self._block.retrieve_repr_request_results(max_results)self._set_internal_query_job(query_job)pd_series=pandas_df.iloc[:,0]importpandas.io.formats# safe to mutate this, this dict is owned by this code, and does not affect global configto_string_kwargs=pandas.io.formats.format.get_series_repr_params()# type: ignoreiflen(self._block.index_columns)==0:to_string_kwargs.update({"index":False})repr_string=pd_series.to_string(**to_string_kwargs)returnrepr_string[docs]defastype(self,dtype:Union[bigframes.dtypes.DtypeString,bigframes.dtypes.Dtype],*,errors:Literal["raise","null"]="raise",)->Series:iferrorsnotin["raise","null"]:raiseValueError("Argument 'errors' must be one of 'raise' or 'null'")dtype=bigframes.dtypes.bigframes_type(dtype)returnself._apply_unary_op(bigframes.operations.AsTypeOp(to_type=dtype,safe=(errors=="null")))
[docs]defto_pandas(self,max_download_size:Optional[int]=None,sampling_method:Optional[str]=None,random_state:Optional[int]=None,*,ordered:bool=True,dry_run:bool=False,allow_large_results:Optional[bool]=None,)->pandas.Series:"""Writes Series to pandas Series.        **Examples:**            >>> s = bpd.Series([4, 3, 2])        Download the data from BigQuery and convert it into an in-memory pandas Series.            >>> s.to_pandas()            0    4            1    3            2    2            dtype: Int64        Estimate job statistics without processing or downloading data by using `dry_run=True`.            >>> s.to_pandas(dry_run=True) # doctest: +SKIP            columnCount                                                            1            columnDtypes                                               {None: Int64}            indexLevel                                                             1            indexDtypes                                                      [Int64]            projectId                                                  bigframes-dev            location                                                              US            jobType                                                            QUERY            destinationTable       {'projectId': 'bigframes-dev', 'datasetId': '_...            useLegacySql                                                       False            referencedTables                                                    None            totalBytesProcessed                                                    0            cacheHit                                                           False            statementType                                                     SELECT            creationTime                            2025-04-03 18:54:59.219000+00:00            dtype: object        Args:            max_download_size (int, default None):                .. deprecated:: 2.0.0                    ``max_download_size`` parameter is deprecated. Please use ``to_pandas_batches()``                    method instead.                Download size threshold in MB. If ``max_download_size`` is exceeded when downloading data,                the data will be downsampled if ``bigframes.options.sampling.enable_downsampling`` is                ``True``, otherwise, an error will be raised. If set to a value other than ``None``,                this will supersede the global config.            sampling_method (str, default None):                .. deprecated:: 2.0.0                    ``sampling_method`` parameter is deprecated. Please use ``sample()`` method instead.                Downsampling algorithms to be chosen from, the choices are: "head": This algorithm                returns a portion of the data from the beginning. It is fast and requires minimal                computations to perform the downsampling; "uniform": This algorithm returns uniform                random samples of the data. If set to a value other than None, this will supersede                the global config.            random_state (int, default None):                .. deprecated:: 2.0.0                    ``random_state`` parameter is deprecated. Please use ``sample()`` method instead.                The seed for the uniform downsampling algorithm. If provided, the uniform method may                take longer to execute and require more computation. If set to a value other than                None, this will supersede the global config.            ordered (bool, default True):                Determines whether the resulting pandas series will be  ordered.                In some cases, unordered may result in a faster-executing query.            dry_run (bool, default False):                If this argument is true, this method will not process the data. Instead, it returns                a Pandas Series containing dry run job statistics            allow_large_results (bool, default None):                If not None, overrides the global setting to allow or disallow large query results                over the default size limit of 10 GB.        Returns:            pandas.Series: A pandas Series with all rows of this Series if the data_sampling_threshold_mb                is not exceeded; otherwise, a pandas Series with downsampled rows of the DataFrame. If dry_run                is set to True, a pandas Series containing dry run statistics will be returned.        """ifmax_download_sizeisnotNone:msg=bfe.format_message("DEPRECATED: The `max_download_size` parameters for `Series.to_pandas()` ""are deprecated and will be removed soon. Please use `Series.to_pandas_batches()`.")warnings.warn(msg,category=FutureWarning)ifsampling_methodisnotNoneorrandom_stateisnotNone:msg=bfe.format_message("DEPRECATED: The `sampling_method` and `random_state` parameters for ""`Series.to_pandas()` are deprecated and will be removed soon. ""Please use `Series.sample().to_pandas()` instead for sampling.")warnings.warn(msg,category=FutureWarning)ifdry_run:dry_run_stats,dry_run_job=self._block._compute_dry_run(max_download_size=max_download_size,sampling_method=sampling_method,random_state=random_state,ordered=ordered,)self._set_internal_query_job(dry_run_job)returndry_run_stats# Repeat the to_pandas() call to make mypy deduce type correctly, because mypy cannot resolve# Literal[True/False] to booldf,query_job=self._block.to_pandas(max_download_size=max_download_size,sampling_method=sampling_method,random_state=random_state,ordered=ordered,allow_large_results=allow_large_results,)ifquery_job:self._set_internal_query_job(query_job)series=df.squeeze(axis=1)series.name=self._namereturnseries
[docs]defto_pandas_batches(self,page_size:Optional[int]=None,max_results:Optional[int]=None,*,allow_large_results:Optional[bool]=None,)->Iterable[pandas.Series]:"""Stream Series results to an iterable of pandas Series.        page_size and max_results determine the size and number of batches,        see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.job.QueryJob#google_cloud_bigquery_job_QueryJob_result        **Examples:**            >>> s = bpd.Series([4, 3, 2, 2, 3])        Iterate through the results in batches, limiting the total rows yielded        across all batches via `max_results`:            >>> for s_batch in s.to_pandas_batches(max_results=3):            ...     print(s_batch)            0    4            1    3            2    2            dtype: Int64        Alternatively, control the approximate size of each batch using `page_size`        and fetch batches manually using `next()`:            >>> it = s.to_pandas_batches(page_size=2)            >>> next(it)            0    4            1    3            dtype: Int64            >>> next(it)            2    2            3    2            dtype: Int64        Args:            page_size (int, default None):                The maximum number of rows of each batch. Non-positive values are ignored.            max_results (int, default None):                The maximum total number of rows of all batches.            allow_large_results (bool, default None):                If not None, overrides the global setting to allow or disallow large query results                over the default size limit of 10 GB.        Returns:            Iterable[pandas.Series]:                An iterable of smaller Series which combine to                form the original Series. Results stream from bigquery,                see https://cloud.google.com/python/docs/reference/bigquery/latest/google.cloud.bigquery.table.RowIterator#google_cloud_bigquery_table_RowIterator_to_arrow_iterable        """batches=self._block.to_pandas_batches(page_size=page_size,max_results=max_results,allow_large_results=allow_large_results,)returnmap(lambdadf:cast(pandas.Series,df.squeeze(1)),batches)
def_compute_dry_run(self)->bigquery.QueryJob:_,query_job=self._block._compute_dry_run((self._value_column,))returnquery_job[docs]defdrop(self,labels:typing.Any=None,*,axis:typing.Union[int,str]=0,index:typing.Any=None,columns:Union[blocks.Label,typing.Iterable[blocks.Label]]=None,level:typing.Optional[LevelType]=None,)->Series:if(labelsisNone)==(indexisNone):raiseValueError("Must specify exactly one of 'labels' or 'index'")iflabelsisnotNone:index=labels# ignore axis, columns paramsblock=self._blocklevel_id=self._resolve_levels(levelor0)[0]if_is_list_like(index):block,inverse_condition_id=block.apply_unary_op(level_id,ops.IsInOp(values=tuple(index),match_nulls=True))block,condition_id=block.apply_unary_op(inverse_condition_id,ops.invert_op)else:block,condition_id=block.project_expr(ops.ne_op.as_expr(level_id,ex.const(index)))block=block.filter_by_id(condition_id,keep_null=True)block=block.drop_columns([condition_id])returnSeries(block.select_column(self._value_column))
[docs]@validations.requires_indexdefdroplevel(self,level:LevelsType,axis:int|str=0):resolved_level_ids=self._resolve_levels(level)returnSeries(self._block.drop_levels(resolved_level_ids))
[docs]@validations.requires_indexdefswaplevel(self,i:int=-2,j:int=-1):level_i=self._block.index_columns[i]level_j=self._block.index_columns[j]mapping={level_i:level_j,level_j:level_i}reordering=[mapping.get(index_id,index_id)forindex_idinself._block.index_columns]returnSeries(self._block.reorder_levels(reordering))
[docs]@validations.requires_indexdefreorder_levels(self,order:LevelsType,axis:int|str=0):resolved_level_ids=self._resolve_levels(order)returnSeries(self._block.reorder_levels(resolved_level_ids))
def_resolve_levels(self,level:LevelsType)->typing.Sequence[str]:returnself._block.index.resolve_level(level)[docs]defbetween(self,left,right,inclusive="both"):ifinclusivenotin["both","neither","left","right"]:raiseValueError("Must set 'inclusive' to one of 'both', 'neither', 'left', or 'right'")left_op=ops.ge_opif(inclusivein["left","both"])elseops.gt_opright_op=ops.le_opif(inclusivein["right","both"])elseops.lt_opreturnself._apply_binary_op(left,left_op).__and__(self._apply_binary_op(right,right_op))
[docs]defcase_when(self,caselist)->Series:cases=[]forcondition,outputinitertools.chain(caselist,[(True,self)]):cases.append(condition)cases.append(output)# In pandas, the default value if no case matches is the original value.# This makes it impossible to change the type of the column, but if# the condition is always True, we know it will match and no subsequent# conditions matter (including the fallback to `self`). This break allows# the type to change (see: internal issue 349926559).ifconditionisTrue:breakreturnself._apply_nary_op(ops.case_when_op,cases,# Self is already included in "others".ignore_self=True,).rename(self.name)
[docs]@validations.requires_ordering()defcumsum(self)->Series:returnself._apply_window_op(agg_ops.sum_op,windows.cumulative_rows())
[docs]@validations.requires_ordering()defffill(self,*,limit:typing.Optional[int]=None)->Series:window=windows.rows(start=NoneiflimitisNoneelse-limit,end=0)returnself._apply_window_op(agg_ops.LastNonNullOp(),window)
pad=ffillpad.__doc__=inspect.getdoc(vendored_pandas_series.Series.ffill)[docs]@validations.requires_ordering()defbfill(self,*,limit:typing.Optional[int]=None)->Series:window=windows.rows(start=0,end=limit)returnself._apply_window_op(agg_ops.FirstNonNullOp(),window)
[docs]@validations.requires_ordering()defcummax(self)->Series:returnself._apply_window_op(agg_ops.max_op,windows.cumulative_rows())
[docs]@validations.requires_ordering()defcummin(self)->Series:returnself._apply_window_op(agg_ops.min_op,windows.cumulative_rows())
[docs]@validations.requires_ordering()defcumprod(self)->Series:returnself._apply_window_op(agg_ops.product_op,windows.cumulative_rows())
[docs]@validations.requires_ordering()defshift(self,periods:int=1)->Series:window_spec=windows.rows()returnself._apply_window_op(agg_ops.ShiftOp(periods),window_spec)
[docs]@validations.requires_ordering()defdiff(self,periods:int=1)->Series:window_spec=windows.rows()returnself._apply_window_op(agg_ops.DiffOp(periods),window_spec)
[docs]@validations.requires_ordering()defpct_change(self,periods:int=1)->Series:# Future versions of pandas will not perfrom ffill automaticallyseries=self.ffill()returnSeries(block_ops.pct_change(series._block,periods=periods))
[docs]@validations.requires_ordering()defrank(self,axis=0,method:str="average",numeric_only=False,na_option:str="keep",ascending:bool=True,pct:bool=False,)->Series:returnSeries(block_ops.rank(self._block,method,na_option,ascending,pct=pct))
[docs]deffillna(self,value=None)->Series:returnself._apply_binary_op(value,ops.fillna_op)
[docs]defreplace(self,to_replace:typing.Any,value:typing.Any=None,*,regex:bool=False):ifregex:# No-op unless to_replace and series dtype are both string typeifnotisinstance(to_replace,str)ornotisinstance(self.dtype,pandas.StringDtype):returnselfreturnself._regex_replace(to_replace,value)elifutils.is_dict_like(to_replace):returnself._mapping_replace(to_replace)# type: ignoreelifutils.is_list_like(to_replace):replace_list=to_replaceelse:# Scalarreplace_list=[to_replace]replace_list=[iforiinreplace_listifbigframes.dtypes.is_compatible(i,self.dtype)]returnself._simple_replace(replace_list,value)ifreplace_listelseself
def_regex_replace(self,to_replace:str,value:str):ifnotbigframes.dtypes.is_dtype(value,self.dtype):raiseNotImplementedError(f"Cannot replace{self.dtype} elements with incompatible item{value} as mixed-type columns not supported.{constants.FEEDBACK_LINK}")block,result_col=self._block.apply_unary_op(self._value_column,ops.RegexReplaceStrOp(to_replace,value),result_label=self.name,)returnSeries(block.select_column(result_col))def_simple_replace(self,to_replace_list:typing.Sequence,value):result_type=bigframes.dtypes.is_compatible(value,self.dtype)ifnotresult_type:raiseNotImplementedError(f"Cannot replace{self.dtype} elements with incompatible item{value} as mixed-type columns not supported.{constants.FEEDBACK_LINK}")ifresult_type!=self.dtype:returnself.astype(result_type)._simple_replace(to_replace_list,value)block,cond=self._block.apply_unary_op(self._value_column,ops.IsInOp(tuple(to_replace_list)))block,result_col=block.project_expr(ops.where_op.as_expr(ex.const(value),cond,self._value_column),self.name)returnSeries(block.select_column(result_col))def_mapping_replace(self,mapping:dict[typing.Hashable,typing.Hashable]):ifnotmapping:returnself.copy()tuples=[]lcd_types:list[typing.Optional[bigframes.dtypes.Dtype]]=[]forkey,valueinmapping.items():lcd_type=bigframes.dtypes.is_compatible(key,self.dtype)ifnotlcd_type:continueifnotbigframes.dtypes.is_dtype(value,self.dtype):raiseNotImplementedError(f"Cannot replace{self.dtype} elements with incompatible item{value} as mixed-type columns not supported.{constants.FEEDBACK_LINK}")tuples.append((key,value))lcd_types.append(lcd_type)result_dtype=functools.reduce(lambdat1,t2:bigframes.dtypes.lcd_type(t1,t2)if(t1andt2)elseNone,lcd_types,self.dtype,)ifnotresult_dtype:raiseNotImplementedError(f"Cannot replace{self.dtype} elements with incompatible mapping{mapping} as mixed-type columns not supported.{constants.FEEDBACK_LINK}")block,result=self._block.apply_unary_op(self._value_column,ops.MapOp(tuple(tuples)))replaced=Series(block.select_column(result))replaced.name=self.namereturnreplaced[docs]@validations.requires_ordering()@validations.requires_indexdefinterpolate(self,method:str="linear")->Series:ifmethod=="pad":returnself.ffill()result=block_ops.interpolate(self._block,method)returnSeries(result)
[docs]defdropna(self,*,axis:int=0,inplace:bool=False,how:typing.Optional[str]=None,ignore_index:bool=False,)->Series:ifinplace:raiseNotImplementedError("'inplace'=True not supported")result=block_ops.dropna(self._block,[self._value_column],how="any")ifignore_index:result=result.reset_index()returnSeries(result)
[docs]@validations.requires_ordering(bigframes.constants.SUGGEST_PEEK_PREVIEW)defhead(self,n:int=5)->Series:returntyping.cast(Series,self.iloc[0:n])
[docs]@validations.requires_ordering()deftail(self,n:int=5)->Series:returntyping.cast(Series,self.iloc[-n:])
[docs]defpeek(self,n:int=5,*,force:bool=True,allow_large_results=None)->pandas.Series:"""        Preview n arbitrary elements from the series without guarantees about row selection or ordering.        ``Series.peek(force=False)`` will always be very fast, but will not succeed if data requires        full data scanning. Using ``force=True`` will always succeed, but may be perform queries.        Query results will be cached so that future steps will benefit from these queries.        Args:            n (int, default 5):                The number of rows to select from the series. Which N rows are returned is non-deterministic.            force (bool, default True):                If the data cannot be peeked efficiently, the series will instead be fully materialized as part                of the operation if ``force=True``. If ``force=False``, the operation will throw a ValueError.            allow_large_results (bool, default None):                If not None, overrides the global setting to allow or disallow large query results                over the default size limit of 10 GB.        Returns:            pandas.Series: A pandas Series with n rows.        Raises:            ValueError: If force=False and data cannot be efficiently peeked.        """maybe_result=self._block.try_peek(n,allow_large_results=allow_large_results)ifmaybe_resultisNone:ifforce:self._cached()maybe_result=self._block.try_peek(n,force=True,allow_large_results=allow_large_results)assertmaybe_resultisnotNoneelse:raiseValueError("Cannot peek efficiently when data has aggregates, joins or window functions applied. Use force=True to fully compute dataframe.")as_series=maybe_result.squeeze(axis=1)as_series.name=self.namereturnas_series
[docs]defitem(self):# Docstring is in third_party/bigframes_vendored/pandas/core/series.pyreturnself.peek(2).item()
[docs]defnlargest(self,n:int=5,keep:str="first")->Series:ifkeepnotin("first","last","all"):raiseValueError("'keep must be one of 'first', 'last', or 'all'")ifkeep!="all":validations.enforce_ordered(self,"nlargest(keep != 'all')")returnSeries(block_ops.nlargest(self._block,n,[self._value_column],keep=keep))
[docs]defnsmallest(self,n:int=5,keep:str="first")->Series:ifkeepnotin("first","last","all"):raiseValueError("'keep must be one of 'first', 'last', or 'all'")ifkeep!="all":validations.enforce_ordered(self,"nsmallest(keep != 'all')")returnSeries(block_ops.nsmallest(self._block,n,[self._value_column],keep=keep))
[docs]defisin(self,values)->"Series":ifisinstance(values,Series):returnSeries(self._block.isin(values._block))ifisinstance(values,indexes.Index):returnSeries(self._block.isin(values.to_series()._block))ifnot_is_list_like(values):raiseTypeError("only list-like objects are allowed to be passed to "f"isin(), you passed a [{type(values).__name__}]")returnself._apply_unary_op(ops.IsInOp(values=tuple(values),match_nulls=True)).fillna(value=False)
[docs]defisna(self)->"Series":returnself._apply_unary_op(ops.isnull_op)
isnull=isnaisnull.__doc__=inspect.getdoc(vendored_pandas_series.Series.isna)[docs]defnotna(self)->"Series":returnself._apply_unary_op(ops.notnull_op)
notnull=notnanotnull.__doc__=inspect.getdoc(vendored_pandas_series.Series.notna)def__and__(self,other:bool|int|Series)->Series:returnself._apply_binary_op(other,ops.and_op)__and__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__and__)__rand__=__and__def__or__(self,other:bool|int|Series)->Series:returnself._apply_binary_op(other,ops.or_op)__or__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__or__)__ror__=__or__def__xor__(self,other:bool|int|Series)->Series:returnself._apply_binary_op(other,ops.xor_op)__or__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__xor__)__rxor__=__xor__def__add__(self,other:float|int|pandas.Timedelta|Series)->Series:returnself.add(other)__add__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__add__)def__radd__(self,other:float|int|pandas.Timedelta|Series)->Series:returnself.radd(other)__radd__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__radd__)[docs]defadd(self,other:float|int|pandas.Timedelta|Series)->Series:returnself._apply_binary_op(other,ops.add_op)
[docs]defradd(self,other:float|int|pandas.Timedelta|Series)->Series:returnself._apply_binary_op(other,ops.add_op,reverse=True)
def__sub__(self,other:float|int|Series)->Series:returnself.sub(other)__sub__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__sub__)def__rsub__(self,other:float|int|Series)->Series:returnself.rsub(other)__rsub__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__rsub__)[docs]defsub(self,other)->Series:returnself._apply_binary_op(other,ops.sub_op)
[docs]defrsub(self,other)->Series:returnself._apply_binary_op(other,ops.sub_op,reverse=True)
subtract=subsubtract.__doc__=inspect.getdoc(vendored_pandas_series.Series.sub)def__mul__(self,other:float|int|Series)->Series:returnself.mul(other)__mul__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__mul__)def__rmul__(self,other:float|int|Series)->Series:returnself.rmul(other)__rmul__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__rmul__)[docs]defmul(self,other:float|int|Series)->Series:returnself._apply_binary_op(other,ops.mul_op)
[docs]defrmul(self,other:float|int|Series)->Series:returnself._apply_binary_op(other,ops.mul_op,reverse=True)
multiply=mulmultiply.__doc__=inspect.getdoc(vendored_pandas_series.Series.mul)def__truediv__(self,other:float|int|pandas.Timedelta|Series)->Series:returnself.truediv(other)__truediv__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__truediv__)def__rtruediv__(self,other:float|int|pandas.Timedelta|Series)->Series:returnself.rtruediv(other)__rtruediv__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__rtruediv__)[docs]deftruediv(self,other:float|int|pandas.Timedelta|Series)->Series:returnself._apply_binary_op(other,ops.div_op)
[docs]defrtruediv(self,other:float|int|pandas.Timedelta|Series)->Series:returnself._apply_binary_op(other,ops.div_op,reverse=True)
truediv.__doc__=inspect.getdoc(vendored_pandas_series.Series.truediv)div=divide=truedivrdiv=rtruedivrdiv.__doc__=inspect.getdoc(vendored_pandas_series.Series.rtruediv)def__floordiv__(self,other:float|int|pandas.Timedelta|Series)->Series:returnself.floordiv(other)__floordiv__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__floordiv__)def__rfloordiv__(self,other:float|int|pandas.Timedelta|Series)->Series:returnself.rfloordiv(other)__rfloordiv__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__rfloordiv__)[docs]deffloordiv(self,other:float|int|pandas.Timedelta|Series)->Series:returnself._apply_binary_op(other,ops.floordiv_op)
[docs]defrfloordiv(self,other:float|int|pandas.Timedelta|Series)->Series:returnself._apply_binary_op(other,ops.floordiv_op,reverse=True)
def__pow__(self,other:float|int|Series)->Series:returnself.pow(other)__pow__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__pow__)def__rpow__(self,other:float|int|Series)->Series:returnself.rpow(other)__rpow__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__rpow__)[docs]defpow(self,other:float|int|Series)->Series:returnself._apply_binary_op(other,ops.pow_op)
[docs]defrpow(self,other:float|int|Series)->Series:returnself._apply_binary_op(other,ops.pow_op,reverse=True)
def__lt__(self,other:float|int|str|Series)->Series:returnself.lt(other)def__le__(self,other:float|int|str|Series)->Series:returnself.le(other)[docs]deflt(self,other)->Series:returnself._apply_binary_op(other,ops.lt_op)
[docs]defle(self,other)->Series:returnself._apply_binary_op(other,ops.le_op)
def__gt__(self,other:float|int|str|Series)->Series:returnself.gt(other)def__ge__(self,other:float|int|str|Series)->Series:returnself.ge(other)[docs]defgt(self,other)->Series:returnself._apply_binary_op(other,ops.gt_op)
[docs]defge(self,other)->Series:returnself._apply_binary_op(other,ops.ge_op)
def__mod__(self,other)->Series:# type: ignorereturnself.mod(other)__mod__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__mod__)def__rmod__(self,other)->Series:# type: ignorereturnself.rmod(other)__rmod__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__rmod__)[docs]defmod(self,other)->Series:# type: ignorereturnself._apply_binary_op(other,ops.mod_op)
[docs]defrmod(self,other)->Series:# type: ignorereturnself._apply_binary_op(other,ops.mod_op,reverse=True)
[docs]defdivmod(self,other)->Tuple[Series,Series]:# type: ignore# TODO(huanc): when self and other both has dtype int and other contains zeros,# the output should be dtype float, both floordiv and mod returns dtype int in this case.return(self.floordiv(other),self.mod(other))
[docs]defrdivmod(self,other)->Tuple[Series,Series]:# type: ignore# TODO(huanc): when self and other both has dtype int and self contains zeros,# the output should be dtype float, both floordiv and mod returns dtype int in this case.return(self.rfloordiv(other),self.rmod(other))
[docs]defdot(self,other):return(self*other).sum()
def__matmul__(self,other):returnself.dot(other)__matmul__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__matmul__)def__rmatmul__(self,other):returnself.dot(other)__rmatmul__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__rmatmul__)[docs]defcombine_first(self,other:Series)->Series:result=self._apply_binary_op(other,ops.coalesce_op)result.name=self.namereturnresult
[docs]defupdate(self,other:Union[Series,Sequence,Mapping])->None:result=self._apply_binary_op(other,ops.coalesce_op,reverse=True,alignment="left")self._set_block(result._get_block())
def__abs__(self)->Series:returnself.abs()__abs__.__doc__=inspect.getdoc(vendored_pandas_series.Series.abs)[docs]defabs(self)->Series:returnself._apply_unary_op(ops.abs_op)
[docs]defround(self,decimals=0)->"Series":returnself._apply_binary_op(decimals,ops.round_op)
[docs]defcorr(self,other:Series,method="pearson",min_periods=None)->float:# TODO(tbergeron): Validate early that both are numeric# TODO(tbergeron): Handle partially-numeric columnsifmethod!="pearson":raiseNotImplementedError(f"Only Pearson correlation is currently supported.{constants.FEEDBACK_LINK}")ifmin_periods:raiseNotImplementedError(f"min_periods not yet supported.{constants.FEEDBACK_LINK}")returnself._apply_binary_aggregation(other,agg_ops.CorrOp())
[docs]defautocorr(self,lag:int=1)->float:returnself.corr(self.shift(lag))
[docs]defcov(self,other:Series)->float:returnself._apply_binary_aggregation(other,agg_ops.CovOp())
[docs]defall(self)->bool:returntyping.cast(bool,self._apply_aggregation(agg_ops.all_op))
[docs]defany(self)->bool:returntyping.cast(bool,self._apply_aggregation(agg_ops.any_op))
[docs]defcount(self)->int:returntyping.cast(int,self._apply_aggregation(agg_ops.count_op))
[docs]defnunique(self)->int:returntyping.cast(int,self._apply_aggregation(agg_ops.nunique_op))
[docs]defmax(self)->scalars.Scalar:returnself._apply_aggregation(agg_ops.max_op)
[docs]defmin(self)->scalars.Scalar:returnself._apply_aggregation(agg_ops.min_op)
[docs]defstd(self)->float:returntyping.cast(float,self._apply_aggregation(agg_ops.std_op))
[docs]defvar(self)->float:returntyping.cast(float,self._apply_aggregation(agg_ops.var_op))
def_central_moment(self,n:int)->float:"""Useful helper for calculating central moment statistics"""# Nth central moment is mean((x-mean(x))^n)# See: https://en.wikipedia.org/wiki/Moment_(mathematics)mean_deltas=self-self.mean()delta_powers=mean_deltas**nreturndelta_powers.mean()[docs]defagg(self,func:str|typing.Sequence[str])->scalars.Scalar|Series:if_is_list_like(func):ifself.dtypenotinbigframes.dtypes.NUMERIC_BIGFRAMES_TYPES_PERMISSIVE:raiseNotImplementedError(f"Multiple aggregations only supported on numeric series.{constants.FEEDBACK_LINK}")aggregations=[agg_ops.lookup_agg_func(f)[0]forfinfunc]returnSeries(self._block.summarize([self._value_column],aggregations,))else:returnself._apply_aggregation(agg_ops.lookup_agg_func(func)[0])
aggregate=aggaggregate.__doc__=inspect.getdoc(vendored_pandas_series.Series.agg)[docs]defdescribe(self)->Series:frombigframes.pandas.core.methodsimportdescribereturncast(Series,describe.describe(self,include="all"))
[docs]defskew(self):count=self.count()ifcount<3:returnpandas.NAmoment3=self._central_moment(3)moment2=self.var()*(count-1)/count# Convert sample var to pop var# See G1 estimator:# https://en.wikipedia.org/wiki/Skewness#Sample_skewnessnumerator=moment3denominator=moment2**(3/2)adjustment=(count*(count-1))**0.5/(count-2)return(numerator/denominator)*adjustment
[docs]defkurt(self):count=self.count()ifcount<4:returnpandas.NAmoment4=self._central_moment(4)moment2=self.var()*(count-1)/count# Convert sample var to pop var# Kurtosis is often defined as the second standardize moment: moment(4)/moment(2)**2# Pandas however uses Fisher’s estimator, implemented belownumerator=(count+1)*(count-1)*moment4denominator=(count-2)*(count-3)*moment2**2adjustment=3*(count-1)**2/((count-2)*(count-3))return(numerator/denominator)-adjustment
kurtosis=kurtkurtosis.__doc__=inspect.getdoc(vendored_pandas_series.Series.kurt)[docs]defmode(self)->Series:block=self._block# Approach: Count each value, return each value for which count(x) == max(counts))block=block.aggregate(by_column_ids=[self._value_column],aggregations=(agg_expressions.UnaryAggregation(agg_ops.count_op,ex.deref(self._value_column)),),)value_count_col_id=block.value_columns[0]block,max_value_count_col_id=block.apply_window_op(value_count_col_id,agg_ops.max_op,window_spec=windows.unbound(),)block,is_mode_col_id=block.apply_binary_op(value_count_col_id,max_value_count_col_id,ops.eq_op,)block=block.filter_by_id(is_mode_col_id)# use temporary name for reset_index to avoid collision, restore after dropping extra columnsblock=(block.with_index_labels(["mode_temp_internal"]).order_by([order.ascending_over(self._value_column)]).reset_index(drop=False))block=block.select_column(self._value_column).with_column_labels([self.name])mode_values_series=Series(block.select_column(self._value_column))returntyping.cast(Series,mode_values_series)
[docs]defmean(self)->float:returntyping.cast(float,self._apply_aggregation(agg_ops.mean_op))
[docs]defmedian(self,*,exact:bool=True)->float:ifexact:returntyping.cast(float,self.quantile(0.5))else:returntyping.cast(float,self._apply_aggregation(agg_ops.median_op))
[docs]defquantile(self,q:Union[float,Sequence[float]]=0.5)->Union[Series,float]:qs=tuple(q)ifutils.is_list_like(q)else(q,)result=block_ops.quantile(self._block,(self._value_column,),qs=qs)ifutils.is_list_like(q):# Drop the first level, since only one columnresult=result.with_column_labels(result.column_labels.droplevel(0))result,index_col=result.create_constant(self.name,None)result=result.set_index([index_col])returnSeries(result.transpose(original_row_index=pandas.Index([self.name])))else:returncast(float,Series(result).to_pandas().squeeze())
[docs]defsum(self)->float:returntyping.cast(float,self._apply_aggregation(agg_ops.sum_op))
[docs]defprod(self)->float:returntyping.cast(float,self._apply_aggregation(agg_ops.product_op))
product=prodproduct.__doc__=inspect.getdoc(vendored_pandas_series.Series.prod)def__eq__(self,other:object)->Series:# type: ignorereturnself.eq(other)def__ne__(self,other:object)->Series:# type: ignorereturnself.ne(other)def__invert__(self)->Series:returnself._apply_unary_op(ops.invert_op)__invert__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__invert__)def__pos__(self)->Series:returnself._apply_unary_op(ops.pos_op)def__neg__(self)->Series:returnself._apply_unary_op(ops.neg_op)def__dir__(self)->List[str]:returndir(type(self))+self._struct_fields[docs]defeq(self,other:object)->Series:# TODO: enforce stricter alignmentreturnself._apply_binary_op(other,ops.eq_op)
[docs]defne(self,other:object)->Series:# TODO: enforce stricter alignmentreturnself._apply_binary_op(other,ops.ne_op)
[docs]defitems(self):forbatch_dfinself._block.to_pandas_batches():assert(batch_df.shape[1]==1),f"Expected 1 column in the dataframe, but got{batch_df.shape[1]}."foriteminbatch_df.squeeze(axis=1).items():yielditem
def_apply_callable(self,condition):""" "Executes the possible callable condition as needed."""ifcallable(condition):# When it's a bigframes function.ifhasattr(condition,"bigframes_bigquery_function"):returnself.apply(condition)# When it's a plain Python function.else:returnself.apply(condition,by_row=False)# When it's not a callable.returncondition[docs]defwhere(self,cond,other=None):cond=self._apply_callable(cond)other=self._apply_callable(other)value_id,cond_id,other_id,block=self._align3(cond,other)block,result_id=block.project_expr(ops.where_op.as_expr(value_id,cond_id,other_id))returnSeries(block.select_column(result_id).with_column_labels([self.name]))
[docs]defclip(self,lower=None,upper=None):iflowerisNoneandupperisNone:returnselfiflowerisNone:returnself._apply_binary_op(upper,ops.minimum_op,alignment="left")ifupperisNone:returnself._apply_binary_op(lower,ops.maximum_op,alignment="left")# special rule to coerce scalar string args to datevalue_id,lower_id,upper_id,block=self._align3(lower,upper,cast_scalars=(bigframes.dtypes.is_date_like(self.dtype)))block,result_id=block.project_expr(ops.clip_op.as_expr(value_id,lower_id,upper_id),)returnSeries(block.select_column(result_id).with_column_labels([self.name]))
[docs]@validations.requires_ordering()defargmax(self)->int:block,row_nums=self._block.promote_offsets()block=block.order_by([order.descending_over(self._value_column),order.ascending_over(row_nums),])returntyping.cast(scalars.Scalar,Series(block.select_column(row_nums)).iloc[0])
[docs]@validations.requires_ordering()defargmin(self)->int:block,row_nums=self._block.promote_offsets()block=block.order_by([order.ascending_over(self._value_column),order.ascending_over(row_nums),])returntyping.cast(scalars.Scalar,Series(block.select_column(row_nums)).iloc[0])
[docs]@validations.requires_indexdefunstack(self,level:LevelsType=-1):ifisinstance(level,int)orisinstance(level,str):level=[level]block=self._blockifself.index.nlevels==1:raiseValueError("Series must have multi-index to unstack")# Pivot by index levelsunstack_ids=self._resolve_levels(level)block=block.reset_index(drop=False)block=block.set_index([colforcolinself._block.index_columnsifcolnotinunstack_ids])pivot_block=block.pivot(columns=unstack_ids,values=self._block.value_columns,values_in_index=False,)returnbigframes.dataframe.DataFrame(pivot_block)
[docs]@validations.requires_indexdefidxmax(self)->blocks.Label:block=self._block.order_by([order.descending_over(self._value_column),*[order.ascending_over(idx_col)foridx_colinself._block.index_columns],])block=block.slice(0,1)returnindexes.Index(block).to_pandas()[0]
[docs]@validations.requires_indexdefidxmin(self)->blocks.Label:block=self._block.order_by([order.ascending_over(self._value_column),*[order.ascending_over(idx_col)foridx_colinself._block.index_columns],])block=block.slice(0,1)returnindexes.Index(block).to_pandas()[0]
@property@validations.requires_ordering()defis_monotonic_increasing(self)->bool:returntyping.cast(bool,self._block.is_monotonic_increasing(self._value_column))@property@validations.requires_ordering()defis_monotonic_decreasing(self)->bool:returntyping.cast(bool,self._block.is_monotonic_decreasing(self._value_column))def__getitem__(self,indexer):# TODO: enforce stricter alignment, should fail if indexer is missing any keys.use_iloc=(isinstance(indexer,slice)andall(isinstance(x,numbers.Integral)or(xisNone)forxin[indexer.start,indexer.stop,indexer.step]))or(isinstance(indexer,numbers.Integral)andnotisinstance(self._block.index.dtypes[0],pandas.Int64Dtype))ifuse_iloc:returnself.iloc[indexer]ifisinstance(indexer,Series):(left,right,block)=self._align(indexer,"left")block=block.filter(right)block=block.select_column(left.id.name)returnSeries(block)returnself.loc[indexer]__getitem__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__getitem__)def__getattr__(self,key:str):# Protect against recursion errors with uninitialized Series objects.# We use "_block" attribute to check whether the instance is initialized.# See:# https://github.com/googleapis/python-bigquery-dataframes/issues/728# and# https://nedbatchelder.com/blog/201010/surprising_getattr_recursion.htmlifkey=="_block":raiseAttributeError(key)elifhasattr(pandas.Series,key):log_adapter.submit_pandas_labels(self._block.session.bqclient,self.__class__.__name__,key)raiseAttributeError(textwrap.dedent(f"""                    BigQuery DataFrames has not yet implemented an equivalent to                    'pandas.Series.{key}'.{constants.FEEDBACK_LINK}                    """))elifkeyinself._struct_fields:returnself.struct.field(key)else:raiseAttributeError(key)def__setitem__(self,key,value)->None:"""Set item using direct assignment, delegating to .loc indexer."""self.loc[key]=valuedef_apply_aggregation(self,op:agg_ops.UnaryAggregateOp|agg_ops.NullaryAggregateOp)->Any:returnself._block.get_stat(self._value_column,op)def_apply_window_op(self,op:agg_ops.UnaryWindowOp,window_spec:windows.WindowSpec):block=self._blockblock,result_id=block.apply_window_op(self._value_column,op,window_spec=window_spec,result_label=self.name)result=Series(block.select_column(result_id))ifop.skips_nulls:returnresult.where(self.notna(),None)else:returnresult[docs]defvalue_counts(self,normalize:bool=False,sort:bool=True,ascending:bool=False,*,dropna:bool=True,):block=block_ops.value_counts(self._block,[self._value_column],normalize=normalize,ascending=ascending,drop_na=dropna,)returnSeries(block)
@typing.overload# type: ignore[override]defsort_values(self,*,axis=...,inplace:Literal[True]=...,ascending:bool|typing.Sequence[bool]=...,kind:str=...,na_position:typing.Literal["first","last"]=...,)->None:...@typing.overloaddefsort_values(self,*,axis=...,inplace:Literal[False]=...,ascending:bool|typing.Sequence[bool]=...,kind:str=...,na_position:typing.Literal["first","last"]=...,)->Series:...[docs]defsort_values(self,*,axis=0,inplace:bool=False,ascending=True,kind:str="quicksort",na_position:typing.Literal["first","last"]="last",)->Optional[Series]:ifaxis!=0andaxis!="index":raiseValueError(f"No axis named{axis} for object type Series")ifna_positionnotin["first","last"]:raiseValueError("Param na_position must be one of 'first' or 'last'")block=self._block.order_by([order.ascending_over(self._value_column,(na_position=="last"))ifascendingelseorder.descending_over(self._value_column,(na_position=="last"))],)ifinplace:self._set_block(block)returnNoneelse:returnSeries(block)
@typing.overload# type: ignore[override]defsort_index(self,*,axis=...,inplace:Literal[False]=...,ascending=...,na_position=...)->Series:...@typing.overloaddefsort_index(self,*,axis=0,inplace:Literal[True]=...,ascending=...,na_position=...)->None:...[docs]@validations.requires_indexdefsort_index(self,*,axis=0,inplace:bool=False,ascending=True,na_position="last")->Optional[Series]:# TODO(tbergeron): Support level parameter once multi-index introduced.ifaxis!=0andaxis!="index":raiseValueError(f"No axis named{axis} for object type Series")ifna_positionnotin["first","last"]:raiseValueError("Param na_position must be one of 'first' or 'last'")block=self._blockna_last=na_position=="last"ordering=[order.ascending_over(column,na_last)ifascendingelseorder.descending_over(column,na_last)forcolumninblock.index_columns]block=block.order_by(ordering)ifinplace:self._set_block(block)returnNoneelse:returnSeries(block)
[docs]@validations.requires_ordering()defrolling(self,window:int|pandas.Timedelta|numpy.timedelta64|datetime.timedelta|str,min_periods:int|None=None,closed:Literal["right","left","both","neither"]="right",)->bigframes.core.window.Window:ifisinstance(window,int):# Rows rollingwindow_spec=windows.WindowSpec(bounds=windows.RowsWindowBounds.from_window_size(window,closed),min_periods=windowifmin_periodsisNoneelsemin_periods,)returnbigframes.core.window.Window(self._block,window_spec,self._block.value_columns,is_series=True)returnrolling.create_range_window(block=self._block,window=window,min_periods=min_periods,closed=closed,is_series=True,)
[docs]@validations.requires_ordering()defexpanding(self,min_periods:int=1)->bigframes.core.window.Window:window_spec=windows.cumulative_rows(min_periods=min_periods)returnbigframes.core.window.Window(self._block,window_spec,self._block.value_columns,is_series=True)
[docs]defgroupby(self,by:typing.Union[blocks.Label,Series,typing.Sequence[typing.Union[blocks.Label,Series]]]=None,axis=0,level:typing.Optional[int|str|typing.Sequence[int]|typing.Sequence[str]]=None,as_index:bool=True,*,dropna:bool=True,)->bigframes.core.groupby.SeriesGroupBy:if(byisnotNone)and(levelisnotNone):raiseValueError("Do not specify both 'by' and 'level'")ifnotas_index:raiseValueError("as_index=False only valid with DataFrame")ifaxis:raiseValueError("No axis named{} for object type Series".format(level))ifnotas_index:raiseValueError("'as_index'=False only applies to DataFrame")ifbyisnotNone:returnself._groupby_values(by,dropna)iflevelisnotNone:returnself._groupby_level(level,dropna)else:raiseTypeError("You have to supply one of 'by' and 'level'")
@validations.requires_indexdef_groupby_level(self,level:int|str|typing.Sequence[int]|typing.Sequence[str],dropna:bool=True,)->bigframes.core.groupby.SeriesGroupBy:ifutils.is_list_like(level):by_key_is_singular=Falseelse:by_key_is_singular=Truereturngroupby.SeriesGroupBy(self._block,self._value_column,by_col_ids=self._resolve_levels(level),value_name=self.name,dropna=dropna,by_key_is_singular=by_key_is_singular,)def_groupby_values(self,by:typing.Union[blocks.Label,Series,typing.Sequence[typing.Union[blocks.Label,Series]]],dropna:bool=True,)->bigframes.core.groupby.SeriesGroupBy:ifnotisinstance(by,Series)and_is_list_like(by):by=list(by)by_key_is_singular=Falseelse:by=[typing.cast(typing.Union[blocks.Label,Series],by)]by_key_is_singular=Trueblock=self._blockgrouping_cols:typing.Sequence[str]=[]value_col=self._value_columnforkeyinby:ifisinstance(key,Series):block,(get_column_left,get_column_right,)=block.join(key._block,how="inner"ifdropnaelse"left")value_col=get_column_left[value_col]grouping_cols=[*[get_column_left[value]forvalueingrouping_cols],get_column_right[key._value_column],]else:# Interpret as index levelmatches=block.index_name_to_col_id.get(key,[])iflen(matches)!=1:raiseValueError(f"GroupBy key{key} does not match a unique index level. BigQuery DataFrames only interprets lists of strings as index level names, not directly as per-row group assignments.")grouping_cols=[*grouping_cols,matches[0]]returngroupby.SeriesGroupBy(block,value_col,by_col_ids=grouping_cols,value_name=self.name,dropna=dropna,by_key_is_singular=by_key_is_singular,)[docs]defapply(self,func,by_row:typing.Union[typing.Literal["compat"],bool]="compat",*,args:typing.Tuple=(),)->Series:# Note: This signature differs from pandas.Series.apply. Specifically,# `args` is keyword-only and `by_row` is a custom parameter here. Full# alignment would involve breaking changes. However, given that by_row# is not frequently used, we defer any such changes until there is a# clear need based on user feedback.## See pandas docs for reference:# https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.apply.html# TODO(shobs, b/274645634): Support convert_dtype, **kwargs# is actually a ternary opifby_rownotin["compat",False]:raiseValueError("Param by_row must be one of 'compat' or False")ifnotcallable(func)andnotisinstance(func,numpy.ufunc):raiseValueError("Only a ufunc (a function that applies to the entire Series) or"" a BigFrames BigQuery function that only works on single values"" are supported.")ifisinstance(func,bigframes.functions.BigqueryCallableRoutine):# We are working with bigquery function at this pointifargs:result_series=self._apply_nary_op(ops.NaryRemoteFunctionOp(function_def=func.udf_def),args)# TODO(jialuo): Investigate why `_apply_nary_op` drops the series# `name`. Manually reassigning it here as a temporary fix.result_series.name=self.nameelse:result_series=self._apply_unary_op(ops.RemoteFunctionOp(function_def=func.udf_def,apply_on_null=True))result_series=func._post_process_series(result_series)returnresult_seriesbf_op=python_ops.python_callable_to_op(func)ifbf_opandisinstance(bf_op,ops.UnaryOp):returnself._apply_unary_op(bf_op)# It is neither a remote function nor a managed function.# Then it must be a vectorized function that applies to the Series# as a whole.ifby_row:raiseValueError("You have passed a function as-is. If your intention is to ""apply this function in a vectorized way (i.e. to the ""entire Series as a whole, and you are sure that it ""performs only the operations that are implemented for a ""Series (e.g. a chain of arithmetic/logical operations, ""such as `def foo(s): return s % 2 == 1`), please also ""specify `by_row=False`. If your function contains ""arbitrary code, it can only be applied to every element ""in the Series individually, in which case you must ""convert it to a BigFrames BigQuery function using ""`bigframes.pandas.udf`, ""or `bigframes.pandas.remote_function` before passing.")try:returnfunc(self)# type: ignoreexceptExceptionasex:# This could happen if any of the operators in func is not# supported on a Series. Let's guide the customer to use a# bigquery function insteadifhasattr(ex,"message"):ex.message+=f"\n{_bigquery_function_recommendation_message}"raise
[docs]defcombine(self,other,func,)->Series:ifnotcallable(func)andnotisinstance(func,numpy.ufunc):raiseValueError("Only a ufunc (a function that applies to the entire Series) or"" a BigFrames BigQuery function that only works on single values"" are supported.")ifisinstance(func,bigframes.functions.BigqueryCallableRoutine):result_series=self._apply_binary_op(other,ops.BinaryRemoteFunctionOp(function_def=func.udf_def))result_series=func._post_process_series(result_series)returnresult_seriesbf_op=python_ops.python_callable_to_op(func)ifbf_opandisinstance(bf_op,ops.BinaryOp):result_series=self._apply_binary_op(other,bf_op)returnresult_series# Keep this in sync with .applytry:returnfunc(self,other)exceptExceptionasex:# This could happen if any of the operators in func is not# supported on a Series. Let's guide the customer to use a# bigquery function insteadifhasattr(ex,"message"):ex.message+=f"\n{_bigquery_function_recommendation_message}"raise
[docs]@validations.requires_indexdefadd_prefix(self,prefix:str,axis:int|str|None=None)->Series:returnSeries(self._get_block().add_prefix(prefix))
[docs]@validations.requires_indexdefadd_suffix(self,suffix:str,axis:int|str|None=None)->Series:returnSeries(self._get_block().add_suffix(suffix))
[docs]deftake(self,indices:typing.Sequence[int],axis:int|str|None=0,**kwargs)->Series:ifnotutils.is_list_like(indices):raiseValueError("indices should be a list-like object.")returntyping.cast(Series,self.iloc[indices])
[docs]deffilter(self,items:typing.Optional[typing.Iterable]=None,like:typing.Optional[str]=None,regex:typing.Optional[str]=None,axis:typing.Optional[typing.Union[str,int]]=None,)->Series:if(axisisnotNone)andutils.get_axis_number(axis)!=0:raiseValueError(f"Invalid axis for series:{axis}")ifsum([(itemsisnotNone),(likeisnotNone),(regexisnotNone)])!=1:raiseValueError("Need to provide exactly one of 'items', 'like', or 'regex'")iflen(self._block.index_columns)>1:raiseNotImplementedError(f"Method filter does not support rows multiindex.{constants.FEEDBACK_LINK}")if(likeisnotNone)or(regexisnotNone):block=self._blockblock,label_string_id=block.apply_unary_op(self._block.index_columns[0],ops.AsTypeOp(to_type=pandas.StringDtype(storage="pyarrow")),)iflikeisnotNone:block,mask_id=block.apply_unary_op(label_string_id,ops.StrContainsOp(pat=like))else:# regexassertregexisnotNoneblock,mask_id=block.apply_unary_op(label_string_id,ops.StrContainsRegexOp(pat=regex))block=block.filter_by_id(mask_id)block=block.select_columns([self._value_column])returnSeries(block)elifitemsisnotNone:# Behavior matches pandas 2.1+, older pandas versions would reindexblock=self._blockblock,mask_id=block.apply_unary_op(self._block.index_columns[0],ops.IsInOp(values=tuple(items)))block=block.filter_by_id(mask_id)block=block.select_columns([self._value_column])returnSeries(block)else:raiseValueError("Need to provide 'items', 'like', or 'regex'")
[docs]@validations.requires_indexdefreindex(self,index=None,*,validate:typing.Optional[bool]=None):ifvalidateandnotself.index.is_unique:raiseValueError("Original index must be unique to reindex")keep_original_names=Falseifisinstance(index,indexes.Index):new_indexer=bigframes.dataframe.DataFrame(data=index._block)[[]]else:ifnotisinstance(index,pandas.Index):keep_original_names=Trueindex=pandas.Index(index)ifindex.nlevels!=self.index.nlevels:raiseNotImplementedError("Cannot reindex with index with different nlevels")new_indexer=bigframes.dataframe.DataFrame(index=index,session=self._get_block().expr.session)[[]]# multiindex join is senstive to index names, so we will set all theseresult=new_indexer.rename_axis(range(new_indexer.index.nlevels)).join(self.to_frame().rename_axis(range(self.index.nlevels)),how="left",)# and then reset the names after the joinresult_block=result.rename_axis(self.index.namesifkeep_original_nameselseindex.names)._blockreturnSeries(result_block)
[docs]@validations.requires_indexdefreindex_like(self,other:Series,*,validate:typing.Optional[bool]=None):returnself.reindex(other.index,validate=validate)
[docs]defdrop_duplicates(self,*,keep:str="first")->Series:block=block_ops.drop_duplicates(self._block,(self._value_column,),keep)returnSeries(block)
[docs]defunique(self,keep_order=True)->Series:ifkeep_order:validations.enforce_ordered(self,"unique(keep_order != False)")returnself.drop_duplicates()block=self._block.aggregate([agg_expressions.UnaryAggregation(agg_ops.AnyValueOp(),ex.deref(self._value_column))],[self._value_column],column_labels=self._block.column_labels,dropna=False,)returnSeries(block.reset_index())
[docs]defduplicated(self,keep:str="first")->Series:block,indicator=block_ops.indicate_duplicates(self._block,(self._value_column,),keep)returnSeries(block.select_column(indicator,).with_column_labels([self.name]))
[docs]defmask(self,cond,other=None)->Series:cond=self._apply_callable(cond)other=self._apply_callable(other)ifnotisinstance(cond,Series):raiseTypeError(f"Only bigframes series condition is supported, received{type(cond).__name__}. "f"{constants.FEEDBACK_LINK}")returnself.where(~cond,other)
[docs]defto_frame(self,name:blocks.Label=None)->bigframes.dataframe.DataFrame:provided_name=nameifnameelseself.name# To be consistent with Pandas, it assigns 0 as the column name if missing. 0 is the first element of RangeIndex.block=self._block.with_column_labels([provided_name]ifprovided_nameelse[0])returnbigframes.dataframe.DataFrame(block)
[docs]defto_csv(self,path_or_buf=None,sep=",",*,header:bool=True,index:bool=True,allow_large_results:Optional[bool]=None,)->Optional[str]:ifutils.is_gcs_path(path_or_buf):returnself.to_frame().to_csv(path_or_buf,sep=sep,header=header,index=index,allow_large_results=allow_large_results,)else:pd_series=self.to_pandas(allow_large_results=allow_large_results)returnpd_series.to_csv(path_or_buf=path_or_buf,sep=sep,header=header,index=index)
[docs]defto_dict(self,into:type[dict]=dict,*,allow_large_results:Optional[bool]=None,)->typing.Mapping:returntyping.cast(dict,self.to_pandas(allow_large_results=allow_large_results).to_dict(into=into))# type: ignore
[docs]defto_excel(self,excel_writer,sheet_name="Sheet1",*,allow_large_results=None,**kwargs)->None:returnself.to_pandas(allow_large_results=allow_large_results).to_excel(excel_writer,sheet_name,**kwargs)
[docs]defto_json(self,path_or_buf=None,orient:Optional[typing.Literal["split","records","index","columns","values","table"]]=None,*,lines:bool=False,index:bool=True,allow_large_results:Optional[bool]=None,)->Optional[str]:ifutils.is_gcs_path(path_or_buf):returnself.to_frame().to_json(path_or_buf=path_or_buf,orient=orient,lines=lines,index=index,allow_large_results=allow_large_results,)else:pd_series=self.to_pandas(allow_large_results=allow_large_results)returnpd_series.to_json(path_or_buf=path_or_buf,orient=orient,lines=lines,index=index# type: ignore)
[docs]defto_latex(self,buf=None,columns=None,header=True,index=True,*,allow_large_results=None,**kwargs,)->typing.Optional[str]:returnself.to_pandas(allow_large_results=allow_large_results).to_latex(buf,columns=columns,header=header,index=index,**kwargs)
[docs]deftolist(self,*,allow_large_results:Optional[bool]=None,)->_list:returnself.to_pandas(allow_large_results=allow_large_results).to_list()
to_list=tolistto_list.__doc__=inspect.getdoc(vendored_pandas_series.Series.tolist)[docs]defto_markdown(self,buf:typing.IO[str]|None=None,mode:str="wt",index:bool=True,*,allow_large_results:Optional[bool]=None,**kwargs,)->typing.Optional[str]:returnself.to_pandas(allow_large_results=allow_large_results).to_markdown(buf,mode=mode,index=index,**kwargs)# type: ignore
[docs]defto_numpy(self,dtype=None,copy=False,na_value=pd_ext.no_default,*,allow_large_results=None,**kwargs,)->numpy.ndarray:returnself.to_pandas(allow_large_results=allow_large_results).to_numpy(dtype,copy,na_value,**kwargs)
def__array__(self,dtype=None,copy:Optional[bool]=None)->numpy.ndarray:ifcopyisFalse:raiseValueError("Cannot convert to array without copy.")returnself.to_numpy(dtype=dtype)__array__.__doc__=inspect.getdoc(vendored_pandas_series.Series.__array__)[docs]defto_pickle(self,path,*,allow_large_results=None,**kwargs)->None:returnself.to_pandas(allow_large_results=allow_large_results).to_pickle(path,**kwargs)
[docs]defto_string(self,buf=None,na_rep="NaN",float_format=None,header=True,index=True,length=False,dtype=False,name=False,max_rows=None,min_rows=None,*,allow_large_results=None,)->typing.Optional[str]:returnself.to_pandas(allow_large_results=allow_large_results).to_string(buf,na_rep,float_format,header,index,length,dtype,name,max_rows,min_rows,)
[docs]defto_xarray(self,*,allow_large_results:Optional[bool]=None,):returnself.to_pandas(allow_large_results=allow_large_results).to_xarray()
def_throw_if_index_contains_duplicates(self,error_message:typing.Optional[str]=None)->None:ifnotself.index.is_unique:error_message=(error_messageiferror_messageelse"Index contains duplicate entries, but uniqueness is required.")raisepandas.errors.InvalidIndexError(error_message)[docs]defmap(self,arg:typing.Union[Mapping,Series,Callable],na_action:Optional[str]=None,*,verify_integrity:bool=False,)->Series:ifna_action:raiseNotImplementedError(f"Non-None na_action argument is not yet supported for Series.map.{constants.FEEDBACK_LINK}")ifisinstance(arg,Series):ifverify_integrity:error_message="When verify_integrity is True in Series.map, index of arg parameter must not have duplicate entries."arg._throw_if_index_contains_duplicates(error_message=error_message)map_df=bigframes.dataframe.DataFrame(arg._block)map_df=map_df.rename(columns={arg.name:self.name})elifisinstance(arg,Mapping):map_df=bigframes.dataframe.DataFrame({"keys":list(arg.keys()),self.name:list(arg.values())},# type: ignoresession=self._get_block().expr.session,)map_df=map_df.set_index("keys")elifcallable(arg):# This is for remote function and managed funtion.returnself.apply(arg)else:# Mirroring pandas, call the uncallable objectarg()# throws TypeError: object is not callableself_df=self.to_frame(name="series")result_df=self_df.join(map_df,on="series")returnresult_df[self.name]
[docs]@validations.requires_ordering()defsample(self,n:Optional[int]=None,frac:Optional[float]=None,*,random_state:Optional[int]=None,sort:Optional[bool|Literal["random"]]="random",)->Series:ifnisnotNoneandfracisnotNone:raiseValueError("Only one of 'n' or 'frac' parameter can be specified.")ns=(n,)ifnisnotNoneelse()fracs=(frac,)iffracisnotNoneelse()returnSeries(self._block.split(ns=ns,fracs=fracs,random_state=random_state,sort=sort)[0])
[docs]defexplode(self,*,ignore_index:Optional[bool]=False)->Series:returnSeries(self._block.explode(column_ids=[self._value_column],ignore_index=ignore_index))
[docs]@validations.requires_ordering()defresample(self,rule:str,*,closed:Optional[Literal["right","left"]]=None,label:Optional[Literal["right","left"]]=None,level:Optional[LevelsType]=None,origin:Union[Union[pandas.Timestamp,datetime.datetime,numpy.datetime64,int,float,str],Literal["epoch","start","start_day","end","end_day"],]="start_day",)->bigframes.core.groupby.SeriesGroupBy:block=self._block._generate_resample_label(rule=rule,closed=closed,label=label,on=None,level=level,origin=origin,)series=Series(block)returnseries.groupby(level=0)
def__array_ufunc__(self,ufunc:numpy.ufunc,method:str,*inputs,**kwargs)->Series:"""Used to support numpy ufuncs.        See: https://numpy.org/doc/stable/reference/ufuncs.html        """# Only __call__ supported with zero argumentsifmethod!="__call__"orlen(inputs)>2orlen(kwargs)>0:returnNotImplementediflen(inputs)==1andufuncinops.NUMPY_TO_OP:returnself._apply_unary_op(ops.NUMPY_TO_OP[ufunc])iflen(inputs)==2andufuncinops.NUMPY_TO_BINOP:binop=ops.NUMPY_TO_BINOP[ufunc]ifinputs[0]isself:returnself._apply_binary_op(inputs[1],binop)else:returnself._apply_binary_op(inputs[0],binop,reverse=True)returnNotImplemented@propertydefplot(self):returnplotting.PlotAccessor(self)[docs]defhist(self,by:typing.Optional[typing.Sequence[str]]=None,bins:int=10,**kwargs):returnself.plot.hist(by=by,bins=bins,**kwargs)
hist.__doc__=inspect.getdoc(plotting.PlotAccessor.hist)[docs]defline(self,x:typing.Optional[typing.Hashable]=None,y:typing.Optional[typing.Hashable]=None,**kwargs,):returnself.plot.line(x=x,y=y,**kwargs)
line.__doc__=inspect.getdoc(plotting.PlotAccessor.line)[docs]defarea(self,x:typing.Optional[typing.Hashable]=None,y:typing.Optional[typing.Hashable]=None,stacked:bool=True,**kwargs,):returnself.plot.area(x=x,y=y,stacked=stacked,**kwargs)
area.__doc__=inspect.getdoc(plotting.PlotAccessor.area)[docs]defbar(self,x:typing.Optional[typing.Hashable]=None,y:typing.Optional[typing.Hashable]=None,**kwargs,):returnself.plot.bar(x=x,y=y,**kwargs)
bar.__doc__=inspect.getdoc(plotting.PlotAccessor.bar)def_slice(self,start:typing.Optional[int]=None,stop:typing.Optional[int]=None,step:typing.Optional[int]=None,)->Series:returnSeries(self._block.slice(start=start,stop=stop,step=stepif(stepisnotNone)else1).select_column(self._value_column),)[docs]defcache(self):"""        Materializes the Series to a temporary table.        Useful if the series will be used multiple times, as this will avoid recomputating the shared intermediate value.        Returns:            Series: Self        """# Do not use session-aware cashing if user-requestedreturnself._cached(force=True,session_aware=False)
def_cached(self,*,force:bool=True,session_aware:bool=True)->Series:self._block.cached(force=force,session_aware=session_aware)returnself# Keep this at the bottom of the Series class to avoid# confusing type checker by overriding str@propertydefstr(self)->strings.StringMethods:importbigframes.operations.stringsasstringsreturnstrings.StringMethods(self)@propertydef_value_column(self)->__builtins__.str:returnself._block.value_columns[0]@propertydef_name(self)->blocks.Label:returnself._block.column_labels[0]@propertydef_dtype(self):returnself._block.dtypes[0]def_set_block(self,block:blocks.Block):self._block=blockdef_get_block(self)->blocks.Block:returnself._blockdef_apply_unary_op(self,op:ops.UnaryOp,)->Series:"""Applies a unary operator to the series."""block,result_id=self._block.apply_unary_op(self._value_column,op,)returnSeries(block.select_column(result_id),name=self.name)# type: ignoredef_apply_binary_op(self,other:typing.Any,op:ops.BinaryOp,alignment:typing.Literal["outer","left"]="outer",reverse:bool=False,)->Series:"""Applies a binary operator to the series and other."""ifbigframes.core.convert.can_convert_to_series(other):self_index=indexes.Index(self._block)other_series=bigframes.core.convert.to_bf_series(other,self_index,self._block.session)(self_col,other_col,block)=self._align(other_series,how=alignment)name=self._name# Drop name if both objects have name attr, but they don't matchif(hasattr(other,"name")andother_series.name!=self._nameandalignment=="outer"):name=Noneexpr=op.as_expr(other_colifreverseelseself_col,self_colifreverseelseother_col)block,result_id=block.project_expr(expr)block=block.select_column(result_id).with_column_labels([name])returnSeries(block)# type: ignoreelse:# Scalar binopname=self._nameexpr=op.as_expr(ex.const(other)ifreverseelseself._value_column,self._value_columnifreverseelseex.const(other),)block,result_id=self._block.project_expr(expr)block=block.select_column(result_id).with_column_labels([name])returnSeries(block)# type: ignoredef_apply_nary_op(self,op:ops.NaryOp,others:Sequence[typing.Union[Series,scalars.Scalar]],ignore_self=False,):"""Applies an n-ary operator to the series and others."""values,block=self._align_n(others,ignore_self=ignore_self,cast_scalars=False)block,result_id=block.project_expr(op.as_expr(*values))returnSeries(block.select_column(result_id))def_apply_binary_aggregation(self,other:Series,stat:agg_ops.BinaryAggregateOp)->float:(left,right,block)=self._align(other,how="outer")assertisinstance(left,ex.DerefOp)assertisinstance(right,ex.DerefOp)returnblock.get_binary_stat(left.id.name,right.id.name,stat)AlignedExprT=Union[ex.ScalarConstantExpression,ex.DerefOp]@typing.overloaddef_align(self,other:Series,how="outer")->tuple[ex.DerefOp,ex.DerefOp,blocks.Block,]:...@typing.overloaddef_align(self,other:typing.Union[Series,scalars.Scalar],how="outer")->tuple[ex.DerefOp,AlignedExprT,blocks.Block,]:...def_align(self,other:typing.Union[Series,scalars.Scalar],how="outer")->tuple[ex.DerefOp,AlignedExprT,blocks.Block,]:"""Aligns the series value with another scalar or series object. Returns new left column id, right column id and joined tabled expression."""values,block=self._align_n([other,],how,)return(typing.cast(ex.DerefOp,values[0]),values[1],block)def_align3(self,other1:Series|scalars.Scalar,other2:Series|scalars.Scalar,how="left",cast_scalars:bool=True)->tuple[ex.DerefOp,AlignedExprT,AlignedExprT,blocks.Block]:# type: ignore"""Aligns the series value with 2 other scalars or series objects. Returns new values and joined tabled expression."""values,index=self._align_n([other1,other2],how,cast_scalars=cast_scalars)return(typing.cast(ex.DerefOp,values[0]),values[1],values[2],index,)def_align_n(self,others:typing.Sequence[typing.Union[Series,scalars.Scalar]],how="outer",ignore_self=False,cast_scalars:bool=False,)->tuple[typing.Sequence[Union[ex.ScalarConstantExpression,ex.DerefOp]],blocks.Block,]:ifignore_self:value_ids:List[Union[ex.ScalarConstantExpression,ex.DerefOp]]=[]else:value_ids=[ex.deref(self._value_column)]block=self._blockforotherinothers:ifisinstance(other,Series):block,(get_column_left,get_column_right,)=block.join(other._block,how=how)rebindings={ids.ColumnId(old):ids.ColumnId(new)forold,newinget_column_left.items()}remapped_value_ids=(value.remap_column_refs(rebindings)forvalueinvalue_ids)value_ids=[*remapped_value_ids,# type: ignoreex.deref(get_column_right[other._value_column]),]else:# Will throw if can't interpret as scalar.dtype=typing.cast(bigframes.dtypes.Dtype,self._dtype)value_ids=[*value_ids,ex.const(other,dtype=dtypeifcast_scalarselseNone),]return(value_ids,block)def_throw_if_null_index(self,opname:__builtins__.str):iflen(self._block.index_columns)==0:raisebigframes.exceptions.NullIndexError(f"Series cannot perform{opname} as it has no index. Set an index using set_index.")
def_is_list_like(obj:typing.Any)->typing_extensions.TypeGuard[typing.Sequence]:returnpandas.api.types.is_list_like(obj)
Movatterモバイル変換

Source code for bigframes.series