I try for some time to speed up processing for a list of tickers using Google Cloud Shell and Python code provided by Gemini and ChatGPT.So I have list of tickers like this:
TB,FMC,FNV,FOOD.L,FOX,FOXA,FRT,FSLR,FTI,FTNT,FTV,FXI,GBPAUD=X,GBPCAD=X,GBPNZD=X,GBPUSD=X,GD,GDXJ.DE,GE,GILD,GIS,GL,GLW,GM,GOLD,GOOG,GOOGL,GPC,GPN,GPS,GRMN,GS,GWW,HAL,HAS,HBAN,HBI,HCA,HD,HES,HIG,HII,HLT,HOG,HOLX,HON,HOOD,HPE,HPQ,HRB,HRL,HSIC,HST,HSY,HUM,HWM,IB01.L,IBCI.DE,IBKR,IBM,ICE,IDTL.L,IDXX,IEX,IFF,IFX.DE,IGLN.L,IGLO.L,IH2O.L,ILMN,INCY,INR.PA,INTC,INTU,IP,IPG,IPGP,IPRE.DE,IQV,IR,IRM,ISLN.L,ISRG,IT,ITA,ITW,IVZ,J,JBHT,JCI,JETS,JKHY,JNJ,JNPR,JPM,JWN,K,KEY,KEYS,KHC,KIM,KLAC,KMB,KMI,KMX,KO,KODK,KR,L,LDOS,LEA,LEG,LEN,LH,LHA.DE,LHX,LIN,LKQ,LLY,LMT,LNC,LNT,LOCK,LOW,LRCX,LUV,LVS,LW,LYB,LYV,M,MA,MAA,MAR,MAS,MBG.DE,MCD,MCHP,MCK,MCO,MDLZ,MDT,MET,METV,MMM,MRNA,MRO,MS,MSCI,MU,NCLH,NDAQ,NEE,NGAS.L,NICE,NIO,NVD.DE,NVO,NZDUSD=X,O,OD7F.DE,OPRA,OXY,PAH3.DE,PBR,PDD,PFE,PINS,PLD,PLTR,PRAR.DE,PSA,PSTG,PUM.DE,RACE,RBLX,RGLD,RIVN,RWE.DE,RY,RYAAY,SAP.DE,SCHB,SEA,SHOP,SHl.DE,SIE.DE,SMSN.L,SNY,SOL-USD,SONY,SOXX,SPG,SPOT,STM,TDOC,TGT,TLK,TLT,TM,TME,TSCO,TSLA,TSM,TTE,TTE.PA,TUR.PA,TWLO,TXN,UBER,UL,URA,USDCAD=X,USDILS=X,USDJPY=X,USDMXN=X,USDNOK=X,USDPLN=X,USDSEK=X,UST.PA,VIV,VLO,VOD,VOW3.DE,VTOP.DE,VX1.DE,WCLD.L,WHR,WU,X,XAD6.DE,XCS4.DE,XLE,XMLD.DE,XRP-USD,XRX,XX2D.DE,Z,ZM,ZPRP.DE,ZPRV.DE
This is the code that read data for this tickers( all code was writen by GPT, I have no coding skils ... just ideas :
import os
import yfinance as yf
import pandas as pd
def fetch_data(ticker):
# Fetch data for the last 5 years with weekly interval
weekly_data = yf.Ticker(ticker).history(period="5y", interval="1wk")
return weekly_data
def save_data(data, ticker, folder):
# Select only Date, Open, High, Low, Close columns
data = data[['Open', 'High', 'Low', 'Close']].reset_index()
# Format OHLC data to 2 decimals
data['Open'] = data['Open'].round(2)
data['High'] = data['High'].round(2)
data['Low'] = data['Low'].round(2)
data['Close'] = data['Close'].round(2)
filename = os.path.join(folder, f"{ticker}.csv")
data.to_csv(filename, index=False)
def update_data(tickers_file, data_folder):
# Load tickers from file
with open(tickers_file, 'r') as f:
tickers = f.read().strip().split(',')
# Check if data folder exists, create if not
if not os.path.exists(data_folder):
os.makedirs(data_folder)
tickers_with_errors = []
for ticker in tickers:
try:
# Fetch data for each ticker
data = fetch_data(ticker)
if len(data) > 0:
# Save data for each ticker
save_data(data, ticker, data_folder)
# Calculate the previous Friday
last_date = pd.to_datetime(data.index.max())
previous_friday = last_date - pd.DateOffset(days=last_date.weekday() - 4) # 4: Friday
# Print the correct date range
print(f"Data saved for {ticker} ")
# Delete last line if it's NOT a Friday, Saturday, or Sunday
filename = os.path.join(data_folder, f"{ticker}.csv")
df = pd.read_csv(filename)
last_date = pd.to_datetime(df.iloc[-1]['Date'])
if last_date.weekday() not in [4, 5, 6]: # 4: Friday, 5: Saturday, 6: Sunday
df = df[:-1] # Remove the last row
df.to_csv(filename, index=False)
print(f"Last line of {ticker}.csv deleted (week incomplete).")
else:
print(f"No data available for {ticker}.")
except Exception as e:
print(f"Error fetching data for {ticker}: {str(e)}")
tickers_with_errors.append(ticker)
return tickers_with_errors
if __name__ == "__main__":
tickers_file = 'tickers.txt'
data_folder = 'data'
tickers_with_errors = update_data(tickers_file, data_folder)
if tickers_with_errors:
print("\nTickers with errors:")
print(tickers_with_errors)
And this is the code that process OHLC data:
import pandas as pd
import matplotlib.pyplot as plt
import mplfinance as mpf
import argparse
import os
import datetime
def load_data(csv_file):
df = pd.read_csv(csv_file)
df['Date'] = pd.to_datetime(df['Date'], utc=True) # Ensure dates are parsed as UTC
df['Date'] = df['Date'].dt.tz_localize(None) # Remove timezone information
df.set_index('Date', inplace=True)
return df
def aggregate_data(df, timeframe):
resampling_rules = {
'W': 'W',
'2W': '2W',
'3W': '3W',
'5W': '5W',
'7W': '7W',
'9W': '9W',
'12W': '12W',
'15W': '15W',
'20W': '20W',
'25W': '25W',
'30W': '30W'
}
if timeframe not in resampling_rules:
raise ValueError(f"Unsupported timeframe: {timeframe}")
rule = resampling_rules[timeframe]
resampled_df = df.resample(rule).agg({
'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last'
}).dropna()
return resampled_df
def check_inside_bar(df, timeframe, ticker):
aggregated_df = aggregate_data(df, timeframe)
latest_bars = aggregated_df.iloc[-2:]
if len(latest_bars) < 2:
print(f"Not enough data to determine inside bar pattern for {ticker}.")
return None, None
latest_bar = latest_bars.iloc[-1]
previous_bar = latest_bars.iloc[-2]
# Check if latest bar is inside the previous bar (mother bar)
if (latest_bar['High'] <= previous_bar['High']) and (latest_bar['Low'] >= previous_bar['Low']):
# Check if the body of the latest bar is greater than the body of the previous bar
latest_bar_body = abs(latest_bar['Close'] - latest_bar['Open'])
previous_bar_body = abs(previous_bar['Close'] - previous_bar['Open'])
if latest_bar_body > previous_bar_body and latest_bar['Close'] > latest_bar['Open']:
latest_bar_end_date = latest_bar.name.to_pydatetime().date()
current_date = datetime.datetime.now().date()
print(f"PATERN found for {ticker}.")
print(f"Next Time Frame will end on: {latest_bar_end_date}")
if latest_bar_end_date > current_date:
print(f"Skipping {ticker} as the latest bar end date is in the future: {latest_bar_end_date}")
return None, None
return aggregated_df, latest_bar
return None, None
def save_data_to_text(df, filename):
with open(filename, 'w') as f:
for idx, row in df.iterrows():
line = f"{idx.date()}: {row['Open']:.2f}:{row['High']:.2f}:{row['Low']:.2f}:{row['Close']:.2f}\n"
f.write(line)
def plot_candlestick_chart(df, output_filename, data_filename):
df = df.iloc[-10:] # Select the last 10 bars for plotting
mpf.plot(df, type='candle', style='charles', title='Candlestick Chart', savefig=output_filename)
save_data_to_text(df, data_filename)
print(f"Candlestick chart saved as {output_filename}")
print(f"Data saved as {data_filename}")
def main():
parser = argparse.ArgumentParser(description='Process stock data.')
parser.add_argument('timeframe', type=str, help='Timeframe for aggregation (e.g., W, 2W, 3W, etc.)')
args = parser.parse_args()
data_folder = 'data'
output_folder = 'aggregated_data'
if not os.path.exists(output_folder):
os.makedirs(output_folder)
csv_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]
for csv_file in csv_files:
df = load_data(os.path.join(data_folder, csv_file))
ticker = csv_file.split('.')[0]
aggregated_df, latest_bar = check_inside_bar(df, args.timeframe, ticker)
if aggregated_df is not None:
timeframe_folder = os.path.join(output_folder, args.timeframe)
if not os.path.exists(timeframe_folder):
os.makedirs(timeframe_folder)
output_filename = os.path.join(timeframe_folder, f"{ticker}.png")
data_filename = os.path.join(timeframe_folder, f"{ticker}_data.txt")
plot_candlestick_chart(aggregated_df, output_filename, data_filename)
if __name__ == "__main__":
main()
For example for this run: sharpvinvest@cloudshell:~ (sharpinvest-428908)$ python insideBar.py W
PATERN found for EURAUD=X.
Next Time Frame will end on: 2024-07-07
Candlestick chart saved as aggregated_data/W/EURAUD=X.png
Data saved as aggregated_data/W/EURAUD=X_data.txt
PATERN found for NIO.
Next Time Frame will end on: 2024-07-14
Candlestick chart saved as aggregated_data/W/NIO.png
Data saved as aggregated_data/W/NIO_data.txt
PATERN found for BSX.
Next Time Frame will end on: 2024-07-14
Candlestick chart saved as aggregated_data/W/BSX.png
Data saved as aggregated_data/W/BSX_data.txt
PATERN found for SOL-USD.
Next Time Frame will end on: 2024-07-14
Candlestick chart saved as aggregated_data/W/SOL-USD.png
Data saved as aggregated_data/W/SOL-USD_data.txt
PATERN found for MU.
Next Time Frame will end on: 2024-07-14
Candlestick chart saved as aggregated_data/W/MU.png
Data saved as aggregated_data/W/MU_data.txt
sharpvinvest@cloudshell:~ (sharpinvest-428908)$
Not correctly calculate the end date of the cuostom time frame. Any attmepts to imrove the code using AI failed. I only got some sort of a result like this:
https://chatgpt.com/share/62d6f456-c...5-3a999e2acf27
TB,FMC,FNV,FOOD.L,FOX,FOXA,FRT,FSLR,FTI,FTNT,FTV,FXI,GBPAUD=X,GBPCAD=X,GBPNZD=X,GBPUSD=X,GD,GDXJ.DE,GE,GILD,GIS,GL,GLW,GM,GOLD,GOOG,GOOGL,GPC,GPN,GPS,GRMN,GS,GWW,HAL,HAS,HBAN,HBI,HCA,HD,HES,HIG,HII,HLT,HOG,HOLX,HON,HOOD,HPE,HPQ,HRB,HRL,HSIC,HST,HSY,HUM,HWM,IB01.L,IBCI.DE,IBKR,IBM,ICE,IDTL.L,IDXX,IEX,IFF,IFX.DE,IGLN.L,IGLO.L,IH2O.L,ILMN,INCY,INR.PA,INTC,INTU,IP,IPG,IPGP,IPRE.DE,IQV,IR,IRM,ISLN.L,ISRG,IT,ITA,ITW,IVZ,J,JBHT,JCI,JETS,JKHY,JNJ,JNPR,JPM,JWN,K,KEY,KEYS,KHC,KIM,KLAC,KMB,KMI,KMX,KO,KODK,KR,L,LDOS,LEA,LEG,LEN,LH,LHA.DE,LHX,LIN,LKQ,LLY,LMT,LNC,LNT,LOCK,LOW,LRCX,LUV,LVS,LW,LYB,LYV,M,MA,MAA,MAR,MAS,MBG.DE,MCD,MCHP,MCK,MCO,MDLZ,MDT,MET,METV,MMM,MRNA,MRO,MS,MSCI,MU,NCLH,NDAQ,NEE,NGAS.L,NICE,NIO,NVD.DE,NVO,NZDUSD=X,O,OD7F.DE,OPRA,OXY,PAH3.DE,PBR,PDD,PFE,PINS,PLD,PLTR,PRAR.DE,PSA,PSTG,PUM.DE,RACE,RBLX,RGLD,RIVN,RWE.DE,RY,RYAAY,SAP.DE,SCHB,SEA,SHOP,SHl.DE,SIE.DE,SMSN.L,SNY,SOL-USD,SONY,SOXX,SPG,SPOT,STM,TDOC,TGT,TLK,TLT,TM,TME,TSCO,TSLA,TSM,TTE,TTE.PA,TUR.PA,TWLO,TXN,UBER,UL,URA,USDCAD=X,USDILS=X,USDJPY=X,USDMXN=X,USDNOK=X,USDPLN=X,USDSEK=X,UST.PA,VIV,VLO,VOD,VOW3.DE,VTOP.DE,VX1.DE,WCLD.L,WHR,WU,X,XAD6.DE,XCS4.DE,XLE,XMLD.DE,XRP-USD,XRX,XX2D.DE,Z,ZM,ZPRP.DE,ZPRV.DE
This is the code that read data for this tickers( all code was writen by GPT, I have no coding skils ... just ideas :
import os
import yfinance as yf
import pandas as pd
def fetch_data(ticker):
# Fetch data for the last 5 years with weekly interval
weekly_data = yf.Ticker(ticker).history(period="5y", interval="1wk")
return weekly_data
def save_data(data, ticker, folder):
# Select only Date, Open, High, Low, Close columns
data = data[['Open', 'High', 'Low', 'Close']].reset_index()
# Format OHLC data to 2 decimals
data['Open'] = data['Open'].round(2)
data['High'] = data['High'].round(2)
data['Low'] = data['Low'].round(2)
data['Close'] = data['Close'].round(2)
filename = os.path.join(folder, f"{ticker}.csv")
data.to_csv(filename, index=False)
def update_data(tickers_file, data_folder):
# Load tickers from file
with open(tickers_file, 'r') as f:
tickers = f.read().strip().split(',')
# Check if data folder exists, create if not
if not os.path.exists(data_folder):
os.makedirs(data_folder)
tickers_with_errors = []
for ticker in tickers:
try:
# Fetch data for each ticker
data = fetch_data(ticker)
if len(data) > 0:
# Save data for each ticker
save_data(data, ticker, data_folder)
# Calculate the previous Friday
last_date = pd.to_datetime(data.index.max())
previous_friday = last_date - pd.DateOffset(days=last_date.weekday() - 4) # 4: Friday
# Print the correct date range
print(f"Data saved for {ticker} ")
# Delete last line if it's NOT a Friday, Saturday, or Sunday
filename = os.path.join(data_folder, f"{ticker}.csv")
df = pd.read_csv(filename)
last_date = pd.to_datetime(df.iloc[-1]['Date'])
if last_date.weekday() not in [4, 5, 6]: # 4: Friday, 5: Saturday, 6: Sunday
df = df[:-1] # Remove the last row
df.to_csv(filename, index=False)
print(f"Last line of {ticker}.csv deleted (week incomplete).")
else:
print(f"No data available for {ticker}.")
except Exception as e:
print(f"Error fetching data for {ticker}: {str(e)}")
tickers_with_errors.append(ticker)
return tickers_with_errors
if __name__ == "__main__":
tickers_file = 'tickers.txt'
data_folder = 'data'
tickers_with_errors = update_data(tickers_file, data_folder)
if tickers_with_errors:
print("\nTickers with errors:")
print(tickers_with_errors)
And this is the code that process OHLC data:
import pandas as pd
import matplotlib.pyplot as plt
import mplfinance as mpf
import argparse
import os
import datetime
def load_data(csv_file):
df = pd.read_csv(csv_file)
df['Date'] = pd.to_datetime(df['Date'], utc=True) # Ensure dates are parsed as UTC
df['Date'] = df['Date'].dt.tz_localize(None) # Remove timezone information
df.set_index('Date', inplace=True)
return df
def aggregate_data(df, timeframe):
resampling_rules = {
'W': 'W',
'2W': '2W',
'3W': '3W',
'5W': '5W',
'7W': '7W',
'9W': '9W',
'12W': '12W',
'15W': '15W',
'20W': '20W',
'25W': '25W',
'30W': '30W'
}
if timeframe not in resampling_rules:
raise ValueError(f"Unsupported timeframe: {timeframe}")
rule = resampling_rules[timeframe]
resampled_df = df.resample(rule).agg({
'Open': 'first',
'High': 'max',
'Low': 'min',
'Close': 'last'
}).dropna()
return resampled_df
def check_inside_bar(df, timeframe, ticker):
aggregated_df = aggregate_data(df, timeframe)
latest_bars = aggregated_df.iloc[-2:]
if len(latest_bars) < 2:
print(f"Not enough data to determine inside bar pattern for {ticker}.")
return None, None
latest_bar = latest_bars.iloc[-1]
previous_bar = latest_bars.iloc[-2]
# Check if latest bar is inside the previous bar (mother bar)
if (latest_bar['High'] <= previous_bar['High']) and (latest_bar['Low'] >= previous_bar['Low']):
# Check if the body of the latest bar is greater than the body of the previous bar
latest_bar_body = abs(latest_bar['Close'] - latest_bar['Open'])
previous_bar_body = abs(previous_bar['Close'] - previous_bar['Open'])
if latest_bar_body > previous_bar_body and latest_bar['Close'] > latest_bar['Open']:
latest_bar_end_date = latest_bar.name.to_pydatetime().date()
current_date = datetime.datetime.now().date()
print(f"PATERN found for {ticker}.")
print(f"Next Time Frame will end on: {latest_bar_end_date}")
if latest_bar_end_date > current_date:
print(f"Skipping {ticker} as the latest bar end date is in the future: {latest_bar_end_date}")
return None, None
return aggregated_df, latest_bar
return None, None
def save_data_to_text(df, filename):
with open(filename, 'w') as f:
for idx, row in df.iterrows():
line = f"{idx.date()}: {row['Open']:.2f}:{row['High']:.2f}:{row['Low']:.2f}:{row['Close']:.2f}\n"
f.write(line)
def plot_candlestick_chart(df, output_filename, data_filename):
df = df.iloc[-10:] # Select the last 10 bars for plotting
mpf.plot(df, type='candle', style='charles', title='Candlestick Chart', savefig=output_filename)
save_data_to_text(df, data_filename)
print(f"Candlestick chart saved as {output_filename}")
print(f"Data saved as {data_filename}")
def main():
parser = argparse.ArgumentParser(description='Process stock data.')
parser.add_argument('timeframe', type=str, help='Timeframe for aggregation (e.g., W, 2W, 3W, etc.)')
args = parser.parse_args()
data_folder = 'data'
output_folder = 'aggregated_data'
if not os.path.exists(output_folder):
os.makedirs(output_folder)
csv_files = [f for f in os.listdir(data_folder) if f.endswith('.csv')]
for csv_file in csv_files:
df = load_data(os.path.join(data_folder, csv_file))
ticker = csv_file.split('.')[0]
aggregated_df, latest_bar = check_inside_bar(df, args.timeframe, ticker)
if aggregated_df is not None:
timeframe_folder = os.path.join(output_folder, args.timeframe)
if not os.path.exists(timeframe_folder):
os.makedirs(timeframe_folder)
output_filename = os.path.join(timeframe_folder, f"{ticker}.png")
data_filename = os.path.join(timeframe_folder, f"{ticker}_data.txt")
plot_candlestick_chart(aggregated_df, output_filename, data_filename)
if __name__ == "__main__":
main()
For example for this run: sharpvinvest@cloudshell:~ (sharpinvest-428908)$ python insideBar.py W
PATERN found for EURAUD=X.
Next Time Frame will end on: 2024-07-07
Candlestick chart saved as aggregated_data/W/EURAUD=X.png
Data saved as aggregated_data/W/EURAUD=X_data.txt
PATERN found for NIO.
Next Time Frame will end on: 2024-07-14
Candlestick chart saved as aggregated_data/W/NIO.png
Data saved as aggregated_data/W/NIO_data.txt
PATERN found for BSX.
Next Time Frame will end on: 2024-07-14
Candlestick chart saved as aggregated_data/W/BSX.png
Data saved as aggregated_data/W/BSX_data.txt
PATERN found for SOL-USD.
Next Time Frame will end on: 2024-07-14
Candlestick chart saved as aggregated_data/W/SOL-USD.png
Data saved as aggregated_data/W/SOL-USD_data.txt
PATERN found for MU.
Next Time Frame will end on: 2024-07-14
Candlestick chart saved as aggregated_data/W/MU.png
Data saved as aggregated_data/W/MU_data.txt
sharpvinvest@cloudshell:~ (sharpinvest-428908)$
Not correctly calculate the end date of the cuostom time frame. Any attmepts to imrove the code using AI failed. I only got some sort of a result like this:
https://chatgpt.com/share/62d6f456-c...5-3a999e2acf27