#iterate by columns then inside Series(Values)
df=pd.DataFrame([[1,2,3],[4,5,6]],columns=['a','b','c'],index=['A','B'])
df.columns = ['x','y','z']
for col in df:
print(col)
print(df[col].dtypes)
for val in df[col]:
print(val)
#iterate by rows
for row_index,row in df.iterrows():
print row_index,row
#iterate by tuples
for index,*values in df.itertuples():
print(index)
print(values[0])
print(values[1])
print(values[2])
sort
#index sort
import pandas as pd
import numpy as np
unsorted_df = pd.DataFrame(np.random.randn(10,2),index=[1,4,6,2,3,5,9,8,0,7],colu
mns = ['col2','col1'])
sorted_df=unsorted_df.sort_index()
print sorted_df
#values sort
import pandas as pd
import numpy as np
unsorted_df = pd.DataFrame({'col1':[2,1,1,1],'col2':[1,3,2,4]})
sorted_df = unsorted_df.sort_values(by=['col1','col2'])
print sorted_df
edit data frame
df=pd.DataFrame({'A': [1, 2, 3, 4, 5],"B":[1, 2, 3, 4, 5]})
df['B']=df['B'].apply(lambda x: math.pow(x,2))
df["B"]=[[math.sqrt(x),x/2] for x in df["B"]]
merge
left − A DataFrame object.
right − Another DataFrame object.
on − Columns (names) to join on. Must be found in both the left and right DataFrame objects.
left_on − Columns from the left DataFrame to use as keys. Can either be column names or arrays with length equal to the length of the DataFrame.
right_on − Columns from the right DataFrame to use as keys. Can either be column names or arrays with length equal to the length of the DataFrame.
left_index − If True, use the index (row labels) from the left DataFrame as its join key(s). In case of a DataFrame with a MultiIndex (hierarchical), the number of levels must match the number of join keys from the right DataFrame.
right_index − Same usage as left_index for the right DataFrame.
how − One of ‘left’, ‘right’, ‘outer’, ‘inner’. Defaults to inner. Each method has been described below.
sort − Sort the result DataFrame by the join keys in lexicographical order. Defaults to True, setting to False will improve the performance substantially in many cases.
import pandas as pd
left = pd.DataFrame({
'id':[1,2,3,4,5],
'Name': ['Alex', 'Amy', 'Allen', 'Alice', 'Ayoung'],
'subject_id':['sub1','sub2','sub4','sub6','sub5']})
right = pd.DataFrame(
{'id':[1,2,3,4,5],
'Name': ['Billy', 'Brian', 'Bran', 'Bryce', 'Betty'],
'subject_id':['sub2','sub4','sub3','sub6','sub5']})
class A:
def __getitem__(self, item):
if(isinstance(item, slice)):
print(item.start)
print(item.stop)
print(item.step)
a = A()
a[1:3:4]
generic class
from typing import TypeVar, Generic
T = TypeVar('T',int , float,complex,decimal.Decimal)
class Stack(Generic[T]):
def __init__(self) -> None:
# Create an empty list with items of type T
self.items: list[T] = []
def push(self, item: T) -> None:
self.items.append(item)
def pop(self) -> T:
return self.items.pop()
def empty(self) -> bool:
return not self.items
predefined static properties
__dict__ − Dictionary containing the class’s namespace.
__doc__ − Class documentation string or none, if undefined.
__name__ − Class name.
__module__ − Module name in which the class is defined. This attribute is “__main__” in interactive mode.
__bases__ − A possibly empty tuple containing the base classes, in the order of their occurrence in the base class list.
Set: Its unique feature is that items are either members or not. This means duplicates are ignored:
Mutable set: The set collection
Immutable set: The frozenset collection
Sequence: Its unique feature is that items are provided with an index position:
Mutable sequence: The list collection
Immutable sequence: The tuple collection
Mapping: Its unique feature is that each item has a key that refers to a value:
Mutable mapping: The dict collection.
Immutable mapping: Interestingly, there’s no built-in frozen mapping.
[:]: The start and stop are implied. The expression S[:] will create a copy of sequence S.
[:stop]: This makes a new list from the beginning to just before the stop value.
[start:]: This makes a new list from the given start to the end of the sequence.
[start:stop]: This picks a sublist, starting from the start index and stopping just before the stop index. Python works with half-open intervals. The start is included, while the end is not included.
[::step]: The start and stop are implied and include the entire sequence. The step—generally not equal to one—means we’ll skip through the list from the start using the step. For a given
[start::step]: The start is given, but the stop is implied. The idea is that the start is an offset, and the step applies to that offset. For a given start, a, step, s, and a list of size |L|.
[:stop:step]: This is used to prevent processing the last few items in a list. Since the step is given, processing begins with element zero.
[start:stop:step]: This will pick elements from a subset of the sequence. Items prior to start and at or after stop will not be used.
from decimal import Decimal
from fractions import Fraction
#math for real numbers, cmath for complex numbers
import math, cmath
#fractions
f=Fraction(0.54)
print(f.numerator, f.denominator)#607985949695017 1125899906842624
f=Fraction(0.54).limit_denominator(1000)
print(f.numerator, f.denominator)#27 50
f=Fraction("22/7")
print(f.numerator, f.denominator)#22 7
# converting to float
print(float(f))#3.142857142857143
# converting to int
print(int(f))#3
#complex numbers
a=67j#complex number complex(0,67)
##converting to polar form
r=abs(a)#67
#decimal
d=Decimal("0.54")
print(d)#0.54
#rounding
print(round(0.54,1))#0.5
print(round(0.54,0))#1.0
#floor and ceil
print(math.floor(0.54))#0
print(math.ceil(0.54))#1
#operations
print(0.54+0.46)#1.0
print(0.54-0.46)#0.08
print(0.54*0.46)#0.2484
print(0.54/0.46)#1.173913043478261
print(0.54//0.46)#1.0
print(round(0.54%0.46,3))#0.08
print(0.54**0.46)#0.7913832183656556
(19/155)*(155/19) #0.9999999999999999
round((19/155)*(155/19)) #1
(19/155)*(155/19) == 1.0#False
math.isclose((19/155)*(155/19), 1)#True
value= 0x110 #0b10001000
import random
rand_val = random.randint(1, 100)
print(random.randrange(0, 100, 5))# 0, 5, 10...95
print(random.randint(1, 100))# 1, 2, 3...100
string
In Python, single-quoted strings and double-quoted strings are the same. This PEP does not make a recommendation for this. Pick a rule and stick to it. When a string contains single or double quote characters, however, use the other one to avoid backslashes in the string
#single line
m:str="hello world"
#multi lines
m2:str="""hello world
my name is python"""
m_digit:str="124"
print(m_digit.isnumeric())# True
print(m_digit.isalpha())# False
regular expressions
import re
string:str = 'hello 12 hi 89. Howdy 34'
regex:re= re.compile(r'(\d+)')
result = regex.findall(string)#['12', '89', '34']
regex2:re= re.compile(r'(\d+) hi')
result=regex2.search(string)
print(result.group(0),result.start(0),result.end(0))#12 hi 6 11
print(result.group(1),result.start(1),result.end(1))#12 6 8
string:str = 'hello 12 hi 89. Howdy 34'
result = re.sub(r'\d+', '', string)# remove all digits
def sum_n(n: int) -> int:
if(n < 0):
raise Exception("n must be a positive integer")
s = 0
for i in range(1, n+1):
s += i
return s
try:
total=sum_n(-1)
print(total)
except Exception as e:
print(e)
from typing import Union
from decimal import Decimal
number=Union[int,float,Decimal,complex]
def add(a:number,b:number)->number:
return a+b
add(14j,5.5)
def dice_t(n: int, sides: int = 6) -> Tuple[int, ...]:
return tuple(random.randint(1, sides) for _ in range(n))
* is used as a prefix for a special parameter that receives all the unmatched positional arguments. We often use *args to collect all of the positional arguments into a single parameter named args.
** is used a prefix for a special parameter that receives all the unmatched named arguments. We often use **kwargs to collect the named values into a parameter named kwargs.
*, when used by itself as a separator between parameters, separates those parameters. It can be applied positionally or by keyword. The remaining parameters can only be provided by keyword.
def fibo_iter() -> typing.Iterable[typing.Tuple[int, int]]:
a = 1
b = 1
while True:
yield (a, b)
a, b = b, a + b
for i, f in fibo_iter():
if i >= 10:
break
print(f, end=' ')
v:typing.Union[int,float] = 1.0#union for merge many types in one type
print(isinstance(v,typing.Union[float,set,dict])) #true
#object is super of all classes
m = 12
print(isinstance(m,object))#true
print(issubclass(int,object))#true
callable:typing.Callable= lambda m: m+12
print(callable(12),isinstance(callable,typing.Callable))#24 true
Vector: TypeAlias = list[float]
UserId = NewType('UserId', int)
some_id = UserId(524313)