#Find connections between out links to initial webpages, to add to edges.scala from avro import schema, datafile, io import re def read_avro_file(): rec_reader = io.DatumReader() df_reader = datafile.DataFileReader(open("/Users/vcedeno/Desktop/webpages/social_00000_v2"),rec_reader) file2 = open("/Users/vcedeno/Desktop/webpages/edgesID.scala",'w') for record in df_reader: id=int(re.search(r'\d+$', record['doc_id']).group()) urlN=record['url'] #searches in the whole file for an in link file = open("/Users/vcedeno/Desktop/webpages/vertexID.scala",'r') for line in file: ids,urls=line.split(" ") if urlN==urls: file2.write("Edge("+str(id)+"L,"+str(ids)+"L,200),") file.close file2.close if __name__ == '__main__': read_avro_file()